From 0329f2e2857700069f07a71f0356017df229669c Mon Sep 17 00:00:00 2001 From: wql Date: Fri, 29 Nov 2024 14:22:39 +0800 Subject: [PATCH] feat: add generate frontend data script --- generate_frontend_data_npu.py | 139 ++++++++++++++++++++++++++++++++++ sort_result.py | 38 +++++----- 2 files changed, 160 insertions(+), 17 deletions(-) create mode 100644 generate_frontend_data_npu.py diff --git a/generate_frontend_data_npu.py b/generate_frontend_data_npu.py new file mode 100644 index 00000000..c614d5cc --- /dev/null +++ b/generate_frontend_data_npu.py @@ -0,0 +1,139 @@ +import os +import shutil +import json + +NV_RESULT_PATH = r"C:\Users\wengq\Desktop\QY项目\前端\910_result.csv" + +RESULTS_PATH = r"C:\Users\wengq\Desktop\QY项目\910b_results\final" + +MODEL_FOLDER = ["9g-8B", "Qwen-7B", "Llama2-7B", "ChatGLM2-6B", "Baichuan2-7B"] +MODEL_PREFIX = ["Qwen", "llama2", "ChatGLM2", "Baichuan2"] +MODEL_FOLDER_PREFIX = {"Qwen-7B": "Qwen", "Llama2-7B": "llama2", "ChatGLM2-6B": "ChatGLM2", "Baichuan2-7B": "Baichuan2"} + +def generate_display_results(step500): + + with open(NV_RESULT_PATH, 'r', encoding='utf-8') as file: + + columns = [] + for i in range(10): # 10 columns in total + columns.append([]) + + first_row = True + for row in file: + if first_row: + row = row[1:] + first_row = False + row = row[:-1] + print(row.split(',')) + + for i, value in enumerate(row.split(',')): + columns[i].append(value) + print("columns") + print(columns) + print("********************") + + + for column in columns: + detail_dict = {} + detail_dict["模型名称"] = column[1] + detail_dict["测试环境"] = column[0] + detail_dict["单卡多卡"] = column[2] + detail_dict["测试方法"] = "Lora微调" + detail_dict["数据集"] = "belle_1m" + detail_dict["框架"] = "PyTorch" + detail_dict["硬件配置"] = "CPU:4*鲲鹏920处理器(48核2.6GHz) \n内存:2TB(32 * 64GB)3200MHz DDR4内存 \n系统盘:2 * 480GB SATA SSD,3 * 1.92TB NVMe SSD \n阵列卡:9560-8i/8口/4G/12Gb(带电池保护模块) \n网络1:8*200GE-QSFP56 (含200GBase-SR4光模块-QSFP56-200G-多模模块) \n网络2: 平台集成4个千兆电口 \n昇腾训练模组:昇腾训练模组*8,每一路AI处理器提供7条HCCS互连链路,提供最大392GB/s带宽能力。单颗芯片显存容量为64GB; \n电源:服务器白金2600W 2.0版本交流电源*4 \n4U静态滑轨套件" + + detail_dict["训练时长"] = column[3] + detail_dict["单步时长"] = column[4] + detail_dict["吞吐量"] = column[5] + detail_dict["单卡最大显存占用量"] = column[6] + detail_dict["训练功耗"] = column[7] + detail_dict["能效比"] = column[8] + print("detail_dict") + print(detail_dict) + print("********************") + + + folder_name = detail_dict["测试环境"] + "_" + detail_dict["测试方法"] + "_" + detail_dict["模型名称"] + "_" + detail_dict["单卡多卡"] + folder_path = os.path.join(r"C:\Users\wengq\Desktop\QY项目\前端\测试数据", folder_name) + os.makedirs(folder_path, exist_ok=True) + + print("folder_path") + print(folder_path) + print("********************") + + file_path = os.path.join(folder_path, "display_results.json") + with open(file_path, 'w', encoding='utf-8') as file: + json.dump(detail_dict, file, indent=4, ensure_ascii=False) + + detail_folder_path = get_detail_folder_path(detail_dict["模型名称"], "lora_sft", 1 if detail_dict["单卡多卡"] == "单卡" else 0, step500) + detail_yaml_path = get_detail_yaml_path(detail_dict["模型名称"], "lora_sft", 1 if detail_dict["单卡多卡"] == "单卡" else 0, step500) + + shutil.copy2(detail_yaml_path, os.path.join(folder_path, "setting.yaml")) + shutil.copy2(os.path.join(detail_folder_path, "trainer_log.jsonl"), os.path.join(folder_path, "trainer_log.jsonl")) + + # for column in columns: + # detail_dict = {} + # detail_dict["模型名称"] = column[1] + # detail_dict["测试环境"] = column[0] + # detail_dict["单卡多卡"] = column[2] + # detail_dict["测试方法"] = "推理" + # detail_dict["数据集"] = "alpaca_gpt4_zh" + # detail_dict["框架"] = "PyTorch" + # detail_dict["硬件配置"] = "详细硬件配置" #需要检查 + + # detail_dict["训练时长"] = column[9] + # detail_dict["单步时长"] = None + # detail_dict["吞吐量"] = column[10] + # detail_dict["单卡最大显存占用量"] = column[11] + # detail_dict["训练功耗"] = column[12] + # detail_dict["能效比"] = None + # print("detail_dict") + # print(detail_dict) + # print("********************") + + # folder_name = detail_dict["测试环境"] + "_" + detail_dict["测试方法"] + "_" + detail_dict["模型名称"] + "_" + detail_dict["单卡多卡"] + # folder_path = os.path.join(r"C:\Users\wengq\Desktop\QY项目\前端\测试数据", folder_name) + # os.makedirs(folder_path, exist_ok=True) + + # print("folder_path") + # print(folder_path) + # print("********************") + + # file_path = os.path.join(folder_path, "display_results.json") + # with open(file_path, 'w', encoding='utf-8') as file: + # json.dump(detail_dict, file, indent=4, ensure_ascii=False) + + # detail_yaml_path = get_detail_yaml_path(detail_dict["模型名称"], MODEL_FOLDER_PREFIX[detail_dict["模型名称"]], "inference", 1, 1 if detail_dict["单卡多卡"] == "单卡" else 0, step500) + # print(detail_yaml_path) + # print(os.path.join(folder_path, "setting.yaml")) + # shutil.copy2(detail_yaml_path, os.path.join(folder_path, "setting.yaml")) + +def get_detail_folder_path(model_folder, train_type, single, step500): + detail_folder_prefix = train_type + "_" + model_folder + ("_1_gpu" if single else "_8_gpu") + ("_500_step" if step500 else "_1000_step") + print(detail_folder_prefix) + for entry in os.listdir(RESULTS_PATH): + full_path = os.path.join(RESULTS_PATH, entry) + if os.path.isdir(full_path) and entry.startswith(detail_folder_prefix): + return(full_path) + return None + + +def get_detail_yaml_path(model_folder, train_type, single, step500): + detail_folder_path = get_detail_folder_path(model_folder, train_type, single, step500) + print(detail_folder_path) + if detail_folder_path is None: + return None + + for entry in os.listdir(detail_folder_path): + full_path = os.path.join(detail_folder_path, entry) + if os.path.isfile(full_path) and entry.endswith('.yaml'): + return(full_path) + return None + +def main(): + generate_display_results(False) + + +if __name__ == "__main__": + main() diff --git a/sort_result.py b/sort_result.py index 1cbc44e0..20ef11a1 100644 --- a/sort_result.py +++ b/sort_result.py @@ -2,16 +2,23 @@ import os import time import json -AVG_ONLY = False -TRAIN_TYPE = "inference" +AVG_ONLY = True +#AVG_ONLY = False + +TRAIN_TYPE = "lora_sft" +#TRAIN_TYPE = "inference" + +STEP500 = True +#STEP500 = False + RESULTS_PATH = r"C:\Users\wengq\Desktop\QY项目\910b_results\final" if TRAIN_TYPE == "lora_sft": - MODEL_PREFIX = ["Qwen-7B", "Llama2-7B", "ChatGLM2-6B", "Baichuan2-7B"] + MODEL_PREFIX = ["9g-8B", "Qwen-7B", "Llama2-7B", "ChatGLM2-6B", "Baichuan2-7B"] else: MODEL_PREFIX = ["Qwen-7B", "Llama2-7B", "Baichuan2-7B"] -def get_train_result(step500): +def get_train_result(): all_total_sec = [] all_step_sec = [] all_token_per_sec = [] @@ -26,7 +33,7 @@ def get_train_result(step500): train_cnt = 0 gpu_cnt = 1 if single else 8 - step = 500 if step500 else 1000 + step = 500 if STEP500 else 1000 folder_prefix = TRAIN_TYPE + "_" + model_prefix + "_" + str(gpu_cnt) + "_gpu_" + str(step) file_prefix = "train" if TRAIN_TYPE == "lora_sft" else "predict" for dir_name in os.listdir(RESULTS_PATH): @@ -38,7 +45,7 @@ def get_train_result(step500): # print(train_results) total_sec.append(train_results[file_prefix + '_runtime']) if TRAIN_TYPE == "lora_sft": - step_sec.append(train_results['train_runtime'] / (500 if step500 else 1000)) + step_sec.append(train_results['train_runtime'] / (500 if STEP500 else 1000)) token_per_sec.append(train_results['train_tokens_per_second']) if AVG_ONLY: @@ -60,12 +67,12 @@ def get_train_result(step500): print(result) return result -def get_detail_folder_path(model_folder, model_prefix, run_no, single, step500): - detail_folder = model_prefix + "_" + TRAIN_TYPE + "_" + str(run_no) + ("_single" if single else "") + ("_step500" if step500 else "") +def get_detail_folder_path(model_folder, model_prefix, run_no, single): + detail_folder = model_prefix + "_" + TRAIN_TYPE + "_" + str(run_no) + ("_single" if single else "") + ("_step500" if STEP500 else "") return os.path.join(RESULTS_PATH, TRAIN_TYPE, model_folder, detail_folder) -def get_train_start_end_time(model_folder, model_prefix, run_no, single, step500): - trainer_log_path = os.path.join(get_detail_folder_path(model_folder, model_prefix, run_no, single, step500), "trainer_log.jsonl") +def get_train_start_end_time(model_folder, model_prefix, run_no, single): + trainer_log_path = os.path.join(get_detail_folder_path(model_folder, model_prefix, run_no, single), "trainer_log.jsonl") start_time = time.strptime(get_first_json(trainer_log_path)['cur_time'], "%Y-%m-%d %H:%M:%S") end_time = time.strptime(get_last_json(trainer_log_path)['cur_time'], "%Y-%m-%d %H:%M:%S") @@ -73,7 +80,7 @@ def get_train_start_end_time(model_folder, model_prefix, run_no, single, step500 return start_time, end_time -def get_gpu_result(step500): +def get_gpu_result(): all_mem = [] all_power_consumption = [] @@ -84,7 +91,7 @@ def get_gpu_result(step500): train_cnt = 0 gpu_cnt = 1 if single else 8 - step = 500 if step500 else 1000 + step = 500 if STEP500 else 1000 folder_prefix = TRAIN_TYPE + "_" + model_prefix + "_" + str(gpu_cnt) + "_gpu_" + str(step) for dir_name in os.listdir(RESULTS_PATH): @@ -158,11 +165,8 @@ def get_last_json(jsonl_file_path): def main(): - get_train_result(False) - get_gpu_result(False) - - # get_train_result(True) - # get_gpu_result(True) + get_train_result() + get_gpu_result() if __name__ == "__main__":