LLaMA-Factory-310P3/generate_frontend_data_npu.py

import os
import shutil
import json

NV_RESULT_PATH = r"C:\Users\wengq\Desktop\QY项目\前端\910_result.csv"

RESULTS_PATH = r"C:\Users\wengq\Desktop\QY项目\910b_results\final"

MODEL_FOLDER = ["9g-8B", "Qwen-7B", "Llama2-7B", "ChatGLM2-6B", "Baichuan2-7B"]
MODEL_PREFIX = ["Qwen",  "llama2", "ChatGLM2", "Baichuan2"]
MODEL_FOLDER_PREFIX = {"Qwen-7B": "Qwen", "Llama2-7B": "llama2", "ChatGLM2-6B": "ChatGLM2", "Baichuan2-7B": "Baichuan2"}

def generate_display_results(step500):

    with open(NV_RESULT_PATH, 'r', encoding='utf-8') as file:

        columns = []
        for i in range(10): # 10 columns in total
            columns.append([])

        first_row = True
        for row in file:
            if first_row:
                row = row[1:]
                first_row = False
            row = row[:-1]
            print(row.split(','))

            for i, value in enumerate(row.split(',')):
                columns[i].append(value)
    print("columns")
    print(columns)
    print("********************")


    for column in columns:
        detail_dict = {}
        detail_dict["模型名称"] = column[1]
        detail_dict["测试环境"] = column[0]
        detail_dict["单卡多卡"] = column[2]
        detail_dict["测试方法"] = "Lora微调"
        detail_dict["数据集"] = "belle_1m"
        detail_dict["框架"] = "PyTorch"
        detail_dict["硬件配置"] = "CPU:4*鲲鹏920处理器（48核2.6GHz） \n内存：2TB（32 * 64GB）3200MHz DDR4内存 \n系统盘：2 * 480GB SATA SSD，3 * 1.92TB NVMe SSD \n阵列卡：9560-8i/8口/4G/12Gb（带电池保护模块） \n网络1：8*200GE-QSFP56 （含200GBase-SR4光模块-QSFP56-200G-多模模块） \n网络2: 平台集成4个千兆电口 \n昇腾训练模组：昇腾训练模组*8，每一路AI处理器提供7条HCCS互连链路，提供最大392GB/s带宽能力。单颗芯片显存容量为64GB； \n电源：服务器白金2600W 2.0版本交流电源*4 \n4U静态滑轨套件"

        detail_dict["训练时长"] = column[3]
        detail_dict["单步时长"] = column[4]
        detail_dict["吞吐量"] = column[5]
        detail_dict["单卡最大显存占用量"] = column[6]
        detail_dict["训练功耗"] = column[7]
        detail_dict["能效比"] = column[8]
        print("detail_dict")
        print(detail_dict)
        print("********************")


        folder_name = detail_dict["测试环境"] + "_" + detail_dict["测试方法"] + "_" + detail_dict["模型名称"] + "_" + detail_dict["单卡多卡"]
        folder_path = os.path.join(r"C:\Users\wengq\Desktop\QY项目\前端\测试数据", folder_name)
        os.makedirs(folder_path, exist_ok=True)

        print("folder_path")
        print(folder_path)
        print("********************")

        file_path = os.path.join(folder_path, "display_results.json")
        with open(file_path, 'w', encoding='utf-8') as file:
            json.dump(detail_dict, file, indent=4, ensure_ascii=False)

        detail_folder_path = get_detail_folder_path(detail_dict["模型名称"], "lora_sft", 1 if detail_dict["单卡多卡"] == "单卡" else 0, step500)
        detail_yaml_path = get_detail_yaml_path(detail_dict["模型名称"], "lora_sft", 1 if detail_dict["单卡多卡"] == "单卡" else 0, step500)

        shutil.copy2(detail_yaml_path, os.path.join(folder_path, "setting.yaml"))
        shutil.copy2(os.path.join(detail_folder_path, "trainer_log.jsonl"), os.path.join(folder_path, "trainer_log.jsonl"))

    # for column in columns:
    #     detail_dict = {}
    #     detail_dict["模型名称"] = column[1]
    #     detail_dict["测试环境"] = column[0]
    #     detail_dict["单卡多卡"] = column[2]
    #     detail_dict["测试方法"] = "推理"
    #     detail_dict["数据集"] = "alpaca_gpt4_zh"
    #     detail_dict["框架"] = "PyTorch"
    #     detail_dict["硬件配置"] = "详细硬件配置" #需要检查

    #     detail_dict["训练时长"] = column[9]
    #     detail_dict["单步时长"] = None
    #     detail_dict["吞吐量"] = column[10]
    #     detail_dict["单卡最大显存占用量"] = column[11]
    #     detail_dict["训练功耗"] = column[12]
    #     detail_dict["能效比"] = None
    #     print("detail_dict")
    #     print(detail_dict)
    #     print("********************")

    #     folder_name = detail_dict["测试环境"] + "_" + detail_dict["测试方法"] + "_" + detail_dict["模型名称"] + "_" + detail_dict["单卡多卡"]
    #     folder_path = os.path.join(r"C:\Users\wengq\Desktop\QY项目\前端\测试数据", folder_name)
    #     os.makedirs(folder_path, exist_ok=True)

    #     print("folder_path")
    #     print(folder_path)
    #     print("********************")

    #     file_path = os.path.join(folder_path, "display_results.json")
    #     with open(file_path, 'w', encoding='utf-8') as file:
    #         json.dump(detail_dict, file, indent=4, ensure_ascii=False)

    #     detail_yaml_path = get_detail_yaml_path(detail_dict["模型名称"], MODEL_FOLDER_PREFIX[detail_dict["模型名称"]], "inference", 1, 1 if detail_dict["单卡多卡"] == "单卡" else 0, step500)
    #     print(detail_yaml_path)
    #     print(os.path.join(folder_path, "setting.yaml"))
    #     shutil.copy2(detail_yaml_path, os.path.join(folder_path, "setting.yaml"))

def get_detail_folder_path(model_folder, train_type, single, step500):
    detail_folder_prefix = train_type + "_"  +  model_folder  + ("_1_gpu" if single else "_8_gpu") + ("_500_step" if step500 else "_1000_step")
    print(detail_folder_prefix)
    for entry in os.listdir(RESULTS_PATH):
        full_path = os.path.join(RESULTS_PATH, entry)
        if os.path.isdir(full_path) and entry.startswith(detail_folder_prefix):
            return(full_path)
    return None


def get_detail_yaml_path(model_folder, train_type, single, step500):
    detail_folder_path = get_detail_folder_path(model_folder, train_type, single, step500)
    print(detail_folder_path)
    if detail_folder_path is None:
        return None

    for entry in os.listdir(detail_folder_path):
        full_path = os.path.join(detail_folder_path, entry)
        if os.path.isfile(full_path) and entry.endswith('.yaml'):
            return(full_path)
    return None

def main():
    generate_display_results(False)


if __name__ == "__main__":
    main()