This commit is contained in:
wql 2024-10-15 14:08:56 +08:00
commit 0c6ec554ba
33 changed files with 274 additions and 479 deletions

View File

@ -1 +1,46 @@
bash run_once.sh lora_sft Qwen-7B 4 50
#!/bin/bash
# bash batch_run.sh 3
repeat_cnt=$1
source /usr/local/Ascend/ascend-toolkit/set_env.sh
for ((i=0; i<repeat_cnt; i++)); do
# bash run_once.sh lora_sft Qwen-7B 8 500
# bash run_once.sh lora_sft Qwen-7B 8 1000
# bash run_once.sh lora_sft Qwen-7B 1 500
# bash run_once.sh lora_sft Qwen-7B 1 1000
bash run_once.sh inference Qwen-7B 8 500
bash run_once.sh inference Qwen-7B 8 1000
bash run_once.sh inference Qwen-7B 1 500
bash run_once.sh inference Qwen-7B 1 1000
# bash run_once.sh lora_sft Baichuan2-7B 8 500
# bash run_once.sh lora_sft Baichuan2-7B 8 1000
# bash run_once.sh lora_sft Baichuan2-7B 1 500
# bash run_once.sh lora_sft Baichuan2-7B 1 1000
bash run_once.sh inference Baichuan2-7B 8 500
bash run_once.sh inference Baichuan2-7B 8 1000
bash run_once.sh inference Baichuan2-7B 1 500
bash run_once.sh inference Baichuan2-7B 1 1000
# bash run_once.sh lora_sft ChatGLM2-6B 8 500
# bash run_once.sh lora_sft ChatGLM2-6B 8 1000
# bash run_once.sh lora_sft ChatGLM2-6B 1 500
# bash run_once.sh lora_sft ChatGLM2-6B 1 1000
# bash run_once.sh inference ChatGLM2-6B 8 500
# bash run_once.sh inference ChatGLM2-6B 8 1000
# bash run_once.sh inference ChatGLM2-6B 1 500
# bash run_once.sh inference ChatGLM2-6B 1 1000
# bash run_once.sh lora_sft Llama2-7B 8 500
# bash run_once.sh lora_sft Llama2-7B 8 1000
# bash run_once.sh lora_sft Llama2-7B 1 500
# bash run_once.sh lora_sft Llama2-7B 1 1000
# bash run_once.sh inference Llama2-7B 8 500
# bash run_once.sh inference Llama2-7B 8 1000
# bash run_once.sh inference Llama2-7B 1 500
# bash run_once.sh inference Llama2-7B 1 1000
done

View File

@ -4,24 +4,19 @@ output_dir=$1
sleep_time=$2
print_to_screen=$3
output_file_path="${output_dir}/gpu_status_$(date +"%Y%m%d%H%M%S").json"
device_cnt=$(npu-smi info | grep '310P3' | wc -l)
chip_id_arr_from_info=(0 1 0 1 0 1 0 1)
npu_id_arr_from_info=(2 2 3 3 5 5 6 6)
npu_id_arr=(2 3 5 6)
output_file_path="${output_dir}/npu_status_$(date +"%Y%m%d%H%M%S").json"
device_cnt=$(npu-smi info | grep '910B1' | wc -l)
get_power_dissipation() {
local npu_id="$1"
power_dissipation=$(npu-smi info -t power -i ${npu_id} | grep "Power Dissipation(W)" | awk '{print $4}')
power_dissipation=$(npu-smi info -t power -i ${npu_id} | grep "NPU Real-time Power(W)" | awk '{print $5}')
echo "${power_dissipation}"
}
get_mem_usage() {
local chip="$1"
local device="$2"
mem_usage=$(npu-smi info | grep "${chip} ${device}" | awk '{print $8}')
local npu_id="$1"
mem_usage=$(npu-smi info -t usages -i ${npu_id} | grep "HBM Usage Rate(%)" | awk '{print $5}')
echo "${mem_usage}"
}
@ -32,15 +27,18 @@ for i in {1..1500}
do
device_mem_usage=""
for ((i=0; i<device_cnt; i++)); do
mem_usage=$(get_mem_usage ${chip_id_arr_from_info[i]} ${i})
device_mem_usage="${device_mem_usage}{\"npu_id\": ${npu_id_arr_from_info[i]}, \"chip_id\": ${chip_id_arr_from_info[i]}, \"device_id\": ${i}, \"mem_usage\": ${mem_usage}}, "
mem_usage=$(get_mem_usage ${i})
# echo "${mem_usage}"
device_mem_usage="${device_mem_usage}{\"npu_id\": ${i}, \"mem_usage_percent\": ${mem_usage}}, "
# echo "${device_mem_usage}"
done
device_mem_usage="${device_mem_usage%??}"
# echo "${device_mem_usage}"
npu_power_dissipation=""
for npu_id in "${npu_id_arr[@]}"; do
power_dissipation=$(get_power_dissipation ${npu_id})
npu_power_dissipation="${npu_power_dissipation}{\"npu_id\": ${npu_id}, \"power_dissipation\": ${power_dissipation}}, "
for ((i=0; i<device_cnt; i++)); do
power_dissipation=$(get_power_dissipation ${i})
npu_power_dissipation="${npu_power_dissipation}{\"npu_id\": ${i}, \"power_dissipation\": ${power_dissipation}}, "
done
npu_power_dissipation="${npu_power_dissipation%??}"

View File

@ -16,22 +16,22 @@ def main():
model_name_or_path = ""
template = ""
if model == "9g-8B":
model_name_or_path = "ms_cache/hub/qy/sft_8b_v2"
model_name_or_path = "/home/ma-user/models/sft_8b_v2"
template = "default"
elif model == "Baichuan2-7B":
model_name_or_path = "ms_cache/hub/baichuan-inc/Baichuan2-7B-Base"
model_name_or_path = "/home/ma-user/models/Baichuan2-7B-Base"
template = "baichuan2"
elif model == "ChatGLM2-6B":
model_name_or_path = "ms_cache/hub/zhipuai/chatglm2-6b"
model_name_or_path = "/home/ma-user/models/chatglm2-6b"
template = "chatglm2"
elif model == "Llama2-7B":
model_name_or_path = "ms_cache/hub/modelscope/llama-2-7b-ms"
model_name_or_path = "/home/ma-user/models/llama-2-7b-ms"
template = "llama2"
elif model == "Qwen-7B":
model_name_or_path = "ms_cache/hub/qwen/Qwen-7B"
model_name_or_path = "/home/ma-user/models/qwen"
template = "qwen"
else:
print("ERROR: model not supported.")
print("ERROR: model not supported or model name wrong")
sys.exit()
config = None

View File

@ -20,3 +20,5 @@ packaging
pyyaml
numpy<2.0.0
modelscope
oss2
addict

View File

@ -1 +0,0 @@
{"cur_time": "2024-09-13 03:19:21", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 1692}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1177}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1380}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}

View File

@ -1,14 +0,0 @@
cutoff_len: 1024
ddp_timeout: 180000000
do_predict: true
eval_dataset: alpaca_gpt4_zh
include_tokens_per_second: true
max_samples: 50
model_name_or_path: ms_cache/hub/baichuan-inc/Baichuan2-7B-Base
output_dir: ./results/inference_Baichuan2-7B_8_gpu_50_step_20240913031907
overwrite_cache: true
overwrite_output_dir: true
per_device_eval_batch_size: 2
predict_with_generate: true
preprocessing_num_workers: 16
template: baichuan2

View File

@ -1,2 +0,0 @@
09/13/2024 03:19:29 - INFO - llamafactory.hparams.parser - Process rank: 0, device: npu:0, n_gpu: 1, distributed training: False, compute dtype: None
09/13/2024 03:19:30 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...

View File

@ -1 +0,0 @@
{"cur_time": "2024-09-13 03:21:17", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 1692}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1177}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}

View File

@ -1,14 +0,0 @@
cutoff_len: 1024
ddp_timeout: 180000000
do_predict: true
eval_dataset: alpaca_gpt4_zh
include_tokens_per_second: true
max_samples: 50
model_name_or_path: ms_cache/hub/baichuan-inc/Baichuan2-7B-Base
output_dir: ./results/inference_Baichuan2-7B_8_gpu_50_step_20240913032104
overwrite_cache: true
overwrite_output_dir: true
per_device_eval_batch_size: 2
predict_with_generate: true
preprocessing_num_workers: 16
template: baichuan2

View File

@ -1,2 +0,0 @@
09/13/2024 03:21:25 - INFO - llamafactory.hparams.parser - Process rank: 0, device: npu:0, n_gpu: 1, distributed training: False, compute dtype: None
09/13/2024 03:21:26 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...

View File

@ -1 +0,0 @@
{"cur_time": "2024-09-13 03:22:45", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 1692}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1177}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1380}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1528}]}

View File

@ -1,14 +0,0 @@
cutoff_len: 1024
ddp_timeout: 180000000
do_predict: true
eval_dataset: alpaca_gpt4_zh
include_tokens_per_second: true
max_samples: 50
model_name_or_path: ms_cache/hub/baichuan-inc/Baichuan2-7B-Base
output_dir: ./results/inference_Baichuan2-7B_8_gpu_50_step_20240913032231
overwrite_cache: true
overwrite_output_dir: true
per_device_eval_batch_size: 2
predict_with_generate: true
preprocessing_num_workers: 16
template: baichuan2

View File

@ -1,2 +0,0 @@
09/13/2024 03:22:53 - INFO - llamafactory.hparams.parser - Process rank: 0, device: npu:0, n_gpu: 1, distributed training: False, compute dtype: None
09/13/2024 03:22:54 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...

View File

@ -1,13 +0,0 @@
{"cur_time": "2024-09-13 05:36:56", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 1692}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1177}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1341}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 05:38:08", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 7875
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 05:39:21", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 18803/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1484}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 05:40:34", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 18803/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 05:41:46", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 19787/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1049}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1484}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 05:42:59", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 19787/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1484}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 05:44:12", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20771/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}

View File

@ -1,14 +0,0 @@
cutoff_len: 1024
ddp_timeout: 180000000
do_predict: true
eval_dataset: alpaca_gpt4_zh
include_tokens_per_second: true
max_samples: 50
model_name_or_path: ms_cache/hub/baichuan-inc/Baichuan2-7B-Base
output_dir: ./results/inference_Baichuan2-7B_8_gpu_50_step_20240913053642
overwrite_cache: true
overwrite_output_dir: true
per_device_eval_batch_size: 2
predict_with_generate: true
preprocessing_num_workers: 16
template: baichuan2

View File

@ -1,10 +0,0 @@
09/13/2024 05:37:03 - INFO - llamafactory.hparams.parser - Process rank: 0, device: npu:0, n_gpu: 1, distributed training: False, compute dtype: None
09/13/2024 05:37:04 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...
eval example:
input_ids:
[195, 2848, 11450, 4755, 4434, 66, 196]
inputs:
<reserved_106>保持健康的三个提示。<reserved_107>
09/13/2024 05:37:24 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.
09/13/2024 05:38:52 - INFO - llamafactory.model.model_utils.attention - Using vanilla attention implementation.
09/13/2024 05:38:52 - INFO - llamafactory.model.loader - all params: 7,505,973,248

View File

@ -1,10 +0,0 @@
{
"predict_bleu-4": 1.7100479999999998,
"predict_model_preparation_time": 0.0123,
"predict_rouge-1": 13.384794,
"predict_rouge-2": 2.707682,
"predict_rouge-l": 6.751074,
"predict_runtime": 6716.3545,
"predict_samples_per_second": 0.007,
"predict_steps_per_second": 0.004
}

File diff suppressed because one or more lines are too long

View File

@ -1,187 +0,0 @@
{"cur_time": "2024-09-13 06:10:27", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 1692}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1177}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1483}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1384}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:11:41", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 8799
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1483}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1384}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:12:54", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 18802/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1483}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1383}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:14:07", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 18803/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1483}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1384}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:15:21", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 19787/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1483}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1384}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1341}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:16:34", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 19787/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1483}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1384}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:17:47", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20771/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1483}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1384}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:19:01", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20782/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1483}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1383}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:20:14", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20783/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1483}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1384}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:21:27", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20782/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1483}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1383}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:22:41", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20782/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1483}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1384}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:23:54", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20782/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1483}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1383}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1528}]}
{"cur_time": "2024-09-13 06:25:07", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20783/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1483}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1384}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:26:21", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20783/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1483}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1383}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:27:34", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20783/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1483}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1383}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:28:47", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20783/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1483}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1383}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:30:00", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20783/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1483}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1383}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:31:14", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20783/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1484}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1383}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:32:27", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20782/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1483}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1383}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:33:41", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20783/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1049}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1484}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:34:54", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20783/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1049}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1484}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1383}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:36:07", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20783/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1484}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:37:21", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20782/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1484}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1383}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:38:34", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20783/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1049}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1484}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1383}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:39:47", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20783/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1049}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1483}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1383}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:41:00", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20783/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1484}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:42:14", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20783/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1483}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1383}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:43:27", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20783/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1484}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:44:40", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20783/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:45:54", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20782/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1484}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1528}]}
{"cur_time": "2024-09-13 06:47:07", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20783/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1484}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:48:20", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:49:34", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1484}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:50:47", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:52:00", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:53:14", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1049}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1484}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:54:27", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:55:40", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:56:54", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20784/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1484}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:58:07", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 06:59:20", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20784/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:00:33", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1049}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:01:47", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20786/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1049}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1528}]}
{"cur_time": "2024-09-13 07:03:00", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1049}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:04:13", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1049}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:05:26", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1528}]}
{"cur_time": "2024-09-13 07:06:40", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1174}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:07:53", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20786/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1174}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:09:06", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1174}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:10:20", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1174}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:11:33", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:12:46", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20786/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1341}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:13:59", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1049}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1484}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1382}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:15:13", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:16:26", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1341}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:17:39", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:18:53", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:20:06", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1174}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1380}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:21:19", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:22:33", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1049}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:23:46", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1049}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1528}]}
{"cur_time": "2024-09-13 07:24:59", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1049}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1341}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:26:13", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1341}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1526}]}
{"cur_time": "2024-09-13 07:27:26", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1380}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1341}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1526}]}
{"cur_time": "2024-09-13 07:28:39", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1049}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1341}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1526}]}
{"cur_time": "2024-09-13 07:29:53", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1380}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1341}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:31:06", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1341}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1526}]}
{"cur_time": "2024-09-13 07:32:19", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1341}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:33:33", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1341}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1526}]}
{"cur_time": "2024-09-13 07:34:46", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1341}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1526}]}
{"cur_time": "2024-09-13 07:35:59", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1049}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1341}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:37:13", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1380}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1341}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1526}]}
{"cur_time": "2024-09-13 07:38:26", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1380}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1341}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1526}]}
{"cur_time": "2024-09-13 07:39:39", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1380}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1341}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:40:53", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1380}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:42:06", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20784/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1342}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1526}]}
{"cur_time": "2024-09-13 07:43:19", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1341}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1526}]}
{"cur_time": "2024-09-13 07:44:32", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1341}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1526}]}
{"cur_time": "2024-09-13 07:45:46", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1341}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1526}]}
{"cur_time": "2024-09-13 07:46:59", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1049}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1341}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1526}]}
{"cur_time": "2024-09-13 07:48:13", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:49:26", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:50:39", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1049}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:51:53", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:53:06", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:54:19", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20786/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:55:33", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1176}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1049}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1380}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:56:46", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1528}]}
{"cur_time": "2024-09-13 07:57:58", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20786/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1485}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 07:59:11", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1049}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 08:00:24", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20785/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1381}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 08:01:37", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20786/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1175}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1819}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1380}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1341}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}
{"cur_time": "2024-09-13 08:02:49", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20786/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1174}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1050}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1380}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1341}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1526}]}
{"cur_time": "2024-09-13 08:04:02", "npu_power_dissipation": [{"npu_id": 2, "power_dissipation": }, {"npu_id": 3, "power_dissipation": }, {"npu_id": 5, "power_dissipation": }, {"npu_id": 6, "power_dissipation": }], "device_mem_usage": [{"npu_id": 2, "chip_id": 0, "device_id": 0, "mem_usage": 20786/
|}, {"npu_id": 2, "chip_id": 1, "device_id": 1, "mem_usage": 1174}, {"npu_id": 3, "chip_id": 0, "device_id": 2, "mem_usage": 1820}, {"npu_id": 3, "chip_id": 1, "device_id": 3, "mem_usage": 1049}, {"npu_id": 5, "chip_id": 0, "device_id": 4, "mem_usage": 1486}, {"npu_id": 5, "chip_id": 1, "device_id": 5, "mem_usage": 1380}, {"npu_id": 6, "chip_id": 0, "device_id": 6, "mem_usage": 1340}, {"npu_id": 6, "chip_id": 1, "device_id": 7, "mem_usage": 1527}]}

View File

@ -1,14 +0,0 @@
cutoff_len: 1024
ddp_timeout: 180000000
do_predict: true
eval_dataset: alpaca_gpt4_zh
include_tokens_per_second: true
max_samples: 50
model_name_or_path: ms_cache/hub/baichuan-inc/Baichuan2-7B-Base
output_dir: ./results/inference_Baichuan2-7B_8_gpu_50_step_20240913061013
overwrite_cache: true
overwrite_output_dir: true
per_device_eval_batch_size: 2
predict_with_generate: true
preprocessing_num_workers: 16
template: baichuan2

View File

@ -1,20 +0,0 @@
09/13/2024 06:10:36 - INFO - llamafactory.hparams.parser - Process rank: 0, device: npu:0, n_gpu: 1, distributed training: False, compute dtype: None
09/13/2024 06:10:36 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...
eval example:
input_ids:
[195, 2848, 11450, 4755, 4434, 66, 196]
inputs:
<reserved_106>保持健康的三个提示。<reserved_107>
09/13/2024 06:10:46 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.
09/13/2024 06:12:15 - INFO - llamafactory.model.model_utils.attention - Using vanilla attention implementation.
09/13/2024 06:12:15 - INFO - llamafactory.model.loader - all params: 7,505,973,248
***** predict metrics *****
predict_bleu-4 = 1.71
predict_model_preparation_time = 0.0123
predict_rouge-1 = 13.3848
predict_rouge-2 = 2.7077
predict_rouge-l = 6.7511
predict_runtime = 1:51:56.35
predict_samples_per_second = 0.007
predict_steps_per_second = 0.004
09/13/2024 08:04:11 - INFO - llamafactory.train.sft.trainer - Saving prediction results to ./results/inference_Baichuan2-7B_8_gpu_50_step_20240913061013/generated_predictions.jsonl

View File

@ -1,10 +0,0 @@
{
"predict_bleu-4": 1.7100479999999998,
"predict_model_preparation_time": 0.0123,
"predict_rouge-1": 13.384794,
"predict_rouge-2": 2.707682,
"predict_rouge-l": 6.751074,
"predict_runtime": 6716.3545,
"predict_samples_per_second": 0.007,
"predict_steps_per_second": 0.004
}

View File

@ -1,5 +0,0 @@
{"current_steps": 5, "total_steps": 25, "percentage": 20.0, "elapsed_time": "0:14:07", "remaining_time": "0:56:29"}
{"current_steps": 10, "total_steps": 25, "percentage": 40.0, "elapsed_time": "0:34:52", "remaining_time": "0:52:19"}
{"current_steps": 15, "total_steps": 25, "percentage": 60.0, "elapsed_time": "0:55:04", "remaining_time": "0:36:43"}
{"current_steps": 20, "total_steps": 25, "percentage": 80.0, "elapsed_time": "1:23:03", "remaining_time": "0:20:45"}
{"current_steps": 25, "total_steps": 25, "percentage": 100.0, "elapsed_time": "1:46:10", "remaining_time": "0:00:00"}

View File

@ -1,32 +0,0 @@
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": null,
"eval_steps": 500,
"global_step": 0,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [],
"logging_steps": 500,
"max_steps": 0,
"num_input_tokens_seen": 0,
"num_train_epochs": 0,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0,
"train_batch_size": null,
"trial_name": null,
"trial_params": null
}

View File

@ -1,31 +0,0 @@
bf16: true
cutoff_len: 1024
dataset: belle_1m
ddp_timeout: 180000000
do_train: true
eval_steps: 500
eval_strategy: steps
finetuning_type: lora
gradient_accumulation_steps: 8
include_num_input_tokens_seen: true
include_tokens_per_second: true
learning_rate: 0.0001
logging_steps: 3
lora_target: all
lr_scheduler_type: cosine
max_samples: 10000
max_steps: '50'
model_name_or_path: ../../models/Qwen-7B
num_train_epochs: 10.0
output_dir: ./results/lora_sft_Qwen-7B_4_gpu_50_step_20240905070656
overwrite_cache: true
overwrite_output_dir: true
per_device_eval_batch_size: 2
per_device_train_batch_size: 2
plot_loss: true
preprocessing_num_workers: 16
save_steps: 500
stage: sft
template: qwen
val_size: 0.1
warmup_ratio: 0.1

View File

@ -25,7 +25,7 @@ overwrite_output_dir: true
### train
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
learning_rate: 1.0e-5
num_train_epochs: 10.0
lr_scheduler_type: cosine
warmup_ratio: 0.1

View File

@ -11,6 +11,7 @@ cutoff_len: 1024
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
include_num_input_tokens_seen: true
include_tokens_per_second: true
### output

View File

@ -1,10 +1,19 @@
#!/bin/bash
# bash run_once.sh lora_sft Qwen-7B 8 50
# bash run_once.sh lora_sft Qwen-7B 1 50
# bash run_once.sh inference Qwen-7B 8 50
run_type="$1"
model="$2"
gpu_cnt="$3"
max_steps="$4"
echo "run_type: ${run_type}"
echo "model: ${model}"
echo "gpu_cnt: ${gpu_cnt}"
echo "max_steps: ${max_steps}"
current_datetime=$(date +%Y%m%d%H%M%S)
if [ "${run_type}"="lora_sft" ]; then
@ -14,7 +23,7 @@ else
run_name="${run_type}_${model}_${gpu_cnt}_gpu_${current_datetime}"
fi
output_dir="./results/${run_name}"
output_dir="./results/910b/${run_name}"
if [ ! -d "$output_dir" ]; then
mkdir -p "$output_dir"
@ -31,21 +40,30 @@ export USE_MODELSCOPE_HUB=1
echo "Start recording npu status "
bash npu_status.sh ${output_dir} 60 0 &
npu_status_pid=$!
echo "${npu_status_pid}"
# echo "${npu_status_pid}"
if [ "${gpu_cnt}"="1" ]; then
ASCEND_RT_VISIBLE_DEVICES=0 llamafactory-cli train ${output_dir}/${run_name}.yaml | tee "${output_dir}/log.txt" &
if [ "${gpu_cnt}" = "1" ]; then
ASCEND_RT_VISIBLE_DEVICES=0 llamafactory-cli train ${output_dir}/${run_name}.yaml 2>&1 | tee -i "${output_dir}/log.txt" &
train_pid=$!
echo "Start train"
echo "Start single npu train"
else
FORCE_TORCHRUN=1 llamafactory-cli train ${output_dir}/${run_name}.yaml | tee "${output_dir}/log.txt" &
FORCE_TORCHRUN=1 llamafactory-cli train ${output_dir}/${run_name}.yaml 2>&1 | tee -i "${output_dir}/log.txt" &
train_pid=$!
echo "Start train"
echo "Start multi npu train"
fi
wait $train_pid
train_status=$?
echo "Train ended"
# sleep 60
sleep 60
kill $npu_status_pid
echo "Npu status ended"
echo "train_status ${train_status}"
if [ $train_status -ne 0 ]; then
output_dir="${output_dir}_fail"
fi
python3 -c "import moxing as mox; mox.file.copy_parallel('${output_dir}', 'obs://xty/results/${run_name}')"
rm -r ${output_dir}

169
sort_result.py Normal file
View File

@ -0,0 +1,169 @@
import os
import time
import json
AVG_ONLY = False
TRAIN_TYPE = "inference"
RESULTS_PATH = r"C:\Users\wengq\Desktop\QY项目\910b_results\final"
if TRAIN_TYPE == "lora_sft":
MODEL_PREFIX = ["Qwen-7B", "Llama2-7B", "ChatGLM2-6B", "Baichuan2-7B"]
else:
MODEL_PREFIX = ["Qwen-7B", "Llama2-7B", "Baichuan2-7B"]
def get_train_result(step500):
all_total_sec = []
all_step_sec = []
all_token_per_sec = []
train_cnt = 0
for model_prefix in MODEL_PREFIX:
for single in range(2):
total_sec = []
step_sec = []
token_per_sec = []
train_cnt = 0
gpu_cnt = 1 if single else 8
step = 500 if step500 else 1000
folder_prefix = TRAIN_TYPE + "_" + model_prefix + "_" + str(gpu_cnt) + "_gpu_" + str(step)
file_prefix = "train" if TRAIN_TYPE == "lora_sft" else "predict"
for dir_name in os.listdir(RESULTS_PATH):
if dir_name.startswith(folder_prefix):
#print(dir_name)
train_cnt += 1
with open(os.path.join(RESULTS_PATH, dir_name, file_prefix + "_results.json"), 'r', encoding='utf-8') as file:
train_results = json.load(file)
# print(train_results)
total_sec.append(train_results[file_prefix + '_runtime'])
if TRAIN_TYPE == "lora_sft":
step_sec.append(train_results['train_runtime'] / (500 if step500 else 1000))
token_per_sec.append(train_results['train_tokens_per_second'])
if AVG_ONLY:
total_sec = [sum(total_sec) / train_cnt]
if TRAIN_TYPE == "lora_sft":
step_sec = [sum(step_sec) / train_cnt]
token_per_sec = [sum(token_per_sec) / train_cnt]
else:
total_sec.append(sum(total_sec) / train_cnt)
if TRAIN_TYPE == "lora_sft":
step_sec.append(sum(step_sec) / train_cnt)
token_per_sec.append(sum(token_per_sec) / train_cnt)
all_total_sec.extend(total_sec)
all_step_sec.extend(step_sec)
all_token_per_sec.extend(token_per_sec)
result = ",".join(map(str, all_total_sec)) + "\n" + ",".join(map(str, all_step_sec)) + "\n" + ",".join(map(str, all_token_per_sec)) + "\n"
print(result)
return result
def get_detail_folder_path(model_folder, model_prefix, run_no, single, step500):
detail_folder = model_prefix + "_" + TRAIN_TYPE + "_" + str(run_no) + ("_single" if single else "") + ("_step500" if step500 else "")
return os.path.join(RESULTS_PATH, TRAIN_TYPE, model_folder, detail_folder)
def get_train_start_end_time(model_folder, model_prefix, run_no, single, step500):
trainer_log_path = os.path.join(get_detail_folder_path(model_folder, model_prefix, run_no, single, step500), "trainer_log.jsonl")
start_time = time.strptime(get_first_json(trainer_log_path)['cur_time'], "%Y-%m-%d %H:%M:%S")
end_time = time.strptime(get_last_json(trainer_log_path)['cur_time'], "%Y-%m-%d %H:%M:%S")
return start_time, end_time
def get_gpu_result(step500):
all_mem = []
all_power_consumption = []
for model_prefix in MODEL_PREFIX:
for single in range(2):
all_run_max_mem = []
all_run_avg_power_consumption = []
train_cnt = 0
gpu_cnt = 1 if single else 8
step = 500 if step500 else 1000
folder_prefix = TRAIN_TYPE + "_" + model_prefix + "_" + str(gpu_cnt) + "_gpu_" + str(step)
for dir_name in os.listdir(RESULTS_PATH):
if dir_name.startswith(folder_prefix):
#print(dir_name)
train_cnt += 1
max_mems = []
power_consumptions = []
for file_name in os.listdir(os.path.join(RESULTS_PATH, dir_name)):
if file_name.startswith("npu_status"):
with open(os.path.join(RESULTS_PATH, dir_name,file_name), 'r', encoding='utf-8') as file:
for line in file:
#print(line)
gpu_results = json.loads(line)
max_mems.append(get_max_mem_in_multi_gpu(gpu_results))
power_consumptions.append(get_sum_power_consumption(gpu_results, single))
all_run_max_mem.append(max(max_mems))
all_run_avg_power_consumption.append(sum(power_consumptions) / len(power_consumptions))
# print(all_run_max_mem)
if AVG_ONLY:
all_run_max_mem = [sum(all_run_max_mem) / train_cnt]
all_run_avg_power_consumption = [sum(all_run_avg_power_consumption) / train_cnt]
else:
all_run_max_mem.append(sum(all_run_max_mem) / train_cnt)
all_run_avg_power_consumption.append(sum(all_run_avg_power_consumption) / train_cnt)
all_mem.extend(all_run_max_mem)
all_power_consumption.extend(all_run_avg_power_consumption)
result = ",".join(map(str, all_mem)) + "\n" + ",".join(map(str, all_power_consumption))
print(result)
return result
def get_sum_power_consumption(gpu_results, single, card_no = 0):
if single:
return gpu_results["npu_power_dissipation"][card_no]["power_dissipation"]
else:
sum = 0
for idx in range(7):
sum += gpu_results["npu_power_dissipation"][idx]["power_dissipation"]
return sum
def get_max_mem_in_multi_gpu(gpu_results):
max_mem_percent = 0
for gpu in gpu_results["device_mem_usage"]:
if gpu["mem_usage_percent"] > max_mem_percent:
max_mem_percent = gpu["mem_usage_percent"]
return max_mem_percent * 64
def get_first_json(jsonl_file_path):
first_json = None
with open(jsonl_file_path, 'r', encoding='utf-8') as file:
first_json = json.loads(file.readline())
return first_json
def get_last_json(jsonl_file_path):
last_json = None
with open(jsonl_file_path, 'r', encoding='utf-8') as file:
for line in file:
last_json = json.loads(line)
return last_json
def main():
get_train_result(False)
get_gpu_result(False)
# get_train_result(True)
# get_gpu_result(True)
if __name__ == "__main__":
main()

View File

@ -14,6 +14,7 @@
import os
import sys
import time
from typing import TYPE_CHECKING, Dict, Literal, Optional, Sequence, Union
import numpy as np

View File

@ -303,6 +303,7 @@ class LogCallback(TrainerCallback):
learning_rate=state.log_history[-1].get("learning_rate", None),
epoch=state.log_history[-1].get("epoch", None),
percentage=round(self.cur_steps / self.max_steps * 100, 2) if self.max_steps != 0 else 100,
cur_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
elapsed_time=self.elapsed_time,
remaining_time=self.remaining_time,
)

View File

@ -15,6 +15,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import time
from typing import TYPE_CHECKING, List, Optional
from ...data import SFTDataCollatorWith4DAttentionMask, get_dataset
@ -113,6 +114,12 @@ def run_sft(
# Predict
if training_args.do_predict:
predict_results = trainer.predict(dataset_module["eval_dataset"], metric_key_prefix="predict", **gen_kwargs)
print("********************************************************")
num_train_tokens = trainer.num_tokens(trainer.get_test_dataloader(dataset_module["eval_dataset"]))
print(f"num_train_tokens: {num_train_tokens}")
print("********************************************************")
if training_args.predict_with_generate: # predict_loss will be wrong if predict_with_generate is enabled
predict_results.metrics.pop("predict_loss", None)
trainer.log_metrics("predict", predict_results.metrics)