LLaMA-Factory-310P3/generate_frontend_data_npu.py

140 lines
6.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import shutil
import json
NV_RESULT_PATH = r"C:\Users\wengq\Desktop\QY项目\前端\910_result.csv"
RESULTS_PATH = r"C:\Users\wengq\Desktop\QY项目\910b_results\final"
MODEL_FOLDER = ["9g-8B", "Qwen-7B", "Llama2-7B", "ChatGLM2-6B", "Baichuan2-7B"]
MODEL_PREFIX = ["Qwen", "llama2", "ChatGLM2", "Baichuan2"]
MODEL_FOLDER_PREFIX = {"Qwen-7B": "Qwen", "Llama2-7B": "llama2", "ChatGLM2-6B": "ChatGLM2", "Baichuan2-7B": "Baichuan2"}
def generate_display_results(step500):
with open(NV_RESULT_PATH, 'r', encoding='utf-8') as file:
columns = []
for i in range(10): # 10 columns in total
columns.append([])
first_row = True
for row in file:
if first_row:
row = row[1:]
first_row = False
row = row[:-1]
print(row.split(','))
for i, value in enumerate(row.split(',')):
columns[i].append(value)
print("columns")
print(columns)
print("********************")
for column in columns:
detail_dict = {}
detail_dict["模型名称"] = column[1]
detail_dict["测试环境"] = column[0]
detail_dict["单卡多卡"] = column[2]
detail_dict["测试方法"] = "Lora微调"
detail_dict["数据集"] = "belle_1m"
detail_dict["框架"] = "PyTorch"
detail_dict["硬件配置"] = "CPU:4*鲲鹏920处理器48核2.6GHz \n内存2TB32 * 64GB3200MHz DDR4内存 \n系统盘2 * 480GB SATA SSD3 * 1.92TB NVMe SSD \n阵列卡9560-8i/8口/4G/12Gb带电池保护模块 \n网络18*200GE-QSFP56 含200GBase-SR4光模块-QSFP56-200G-多模模块) \n网络2: 平台集成4个千兆电口 \n昇腾训练模组:昇腾训练模组*8每一路AI处理器提供7条HCCS互连链路提供最大392GB/s带宽能力。单颗芯片显存容量为64GB \n电源服务器白金2600W 2.0版本交流电源*4 \n4U静态滑轨套件"
detail_dict["训练时长"] = column[3]
detail_dict["单步时长"] = column[4]
detail_dict["吞吐量"] = column[5]
detail_dict["单卡最大显存占用量"] = column[6]
detail_dict["训练功耗"] = column[7]
detail_dict["能效比"] = column[8]
print("detail_dict")
print(detail_dict)
print("********************")
folder_name = detail_dict["测试环境"] + "_" + detail_dict["测试方法"] + "_" + detail_dict["模型名称"] + "_" + detail_dict["单卡多卡"]
folder_path = os.path.join(r"C:\Users\wengq\Desktop\QY项目\前端\测试数据", folder_name)
os.makedirs(folder_path, exist_ok=True)
print("folder_path")
print(folder_path)
print("********************")
file_path = os.path.join(folder_path, "display_results.json")
with open(file_path, 'w', encoding='utf-8') as file:
json.dump(detail_dict, file, indent=4, ensure_ascii=False)
detail_folder_path = get_detail_folder_path(detail_dict["模型名称"], "lora_sft", 1 if detail_dict["单卡多卡"] == "单卡" else 0, step500)
detail_yaml_path = get_detail_yaml_path(detail_dict["模型名称"], "lora_sft", 1 if detail_dict["单卡多卡"] == "单卡" else 0, step500)
shutil.copy2(detail_yaml_path, os.path.join(folder_path, "setting.yaml"))
shutil.copy2(os.path.join(detail_folder_path, "trainer_log.jsonl"), os.path.join(folder_path, "trainer_log.jsonl"))
# for column in columns:
# detail_dict = {}
# detail_dict["模型名称"] = column[1]
# detail_dict["测试环境"] = column[0]
# detail_dict["单卡多卡"] = column[2]
# detail_dict["测试方法"] = "推理"
# detail_dict["数据集"] = "alpaca_gpt4_zh"
# detail_dict["框架"] = "PyTorch"
# detail_dict["硬件配置"] = "详细硬件配置" #需要检查
# detail_dict["训练时长"] = column[9]
# detail_dict["单步时长"] = None
# detail_dict["吞吐量"] = column[10]
# detail_dict["单卡最大显存占用量"] = column[11]
# detail_dict["训练功耗"] = column[12]
# detail_dict["能效比"] = None
# print("detail_dict")
# print(detail_dict)
# print("********************")
# folder_name = detail_dict["测试环境"] + "_" + detail_dict["测试方法"] + "_" + detail_dict["模型名称"] + "_" + detail_dict["单卡多卡"]
# folder_path = os.path.join(r"C:\Users\wengq\Desktop\QY项目\前端\测试数据", folder_name)
# os.makedirs(folder_path, exist_ok=True)
# print("folder_path")
# print(folder_path)
# print("********************")
# file_path = os.path.join(folder_path, "display_results.json")
# with open(file_path, 'w', encoding='utf-8') as file:
# json.dump(detail_dict, file, indent=4, ensure_ascii=False)
# detail_yaml_path = get_detail_yaml_path(detail_dict["模型名称"], MODEL_FOLDER_PREFIX[detail_dict["模型名称"]], "inference", 1, 1 if detail_dict["单卡多卡"] == "单卡" else 0, step500)
# print(detail_yaml_path)
# print(os.path.join(folder_path, "setting.yaml"))
# shutil.copy2(detail_yaml_path, os.path.join(folder_path, "setting.yaml"))
def get_detail_folder_path(model_folder, train_type, single, step500):
detail_folder_prefix = train_type + "_" + model_folder + ("_1_gpu" if single else "_8_gpu") + ("_500_step" if step500 else "_1000_step")
print(detail_folder_prefix)
for entry in os.listdir(RESULTS_PATH):
full_path = os.path.join(RESULTS_PATH, entry)
if os.path.isdir(full_path) and entry.startswith(detail_folder_prefix):
return(full_path)
return None
def get_detail_yaml_path(model_folder, train_type, single, step500):
detail_folder_path = get_detail_folder_path(model_folder, train_type, single, step500)
print(detail_folder_path)
if detail_folder_path is None:
return None
for entry in os.listdir(detail_folder_path):
full_path = os.path.join(detail_folder_path, entry)
if os.path.isfile(full_path) and entry.endswith('.yaml'):
return(full_path)
return None
def main():
generate_display_results(False)
if __name__ == "__main__":
main()