From c4fcb9689b31aec1bffd5880015179d92eec7bf2 Mon Sep 17 00:00:00 2001 From: wql Date: Wed, 11 Sep 2024 09:54:50 +0800 Subject: [PATCH] chore: prepare for test --- npu_status.sh | 8 ++++---- run_once.sh | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/npu_status.sh b/npu_status.sh index ce84a03d..865a1490 100644 --- a/npu_status.sh +++ b/npu_status.sh @@ -7,9 +7,9 @@ print_to_screen=$3 output_file_path="${output_dir}/gpu_status_$(date +"%Y%m%d%H%M%S").json" device_cnt=$(npu-smi info | grep '310P3' | wc -l) -chip_id_arr_from_info=(0 1 0 1) -npu_id_arr_from_info=(5 5 6 6) -npu_id_arr=(5 6) +chip_id_arr_from_info=(0 1 0 1 0 1 0 1) +npu_id_arr_from_info=(2 2 3 3 5 5 6 6) +npu_id_arr=(2 3 5 6) loop_cnt=0 @@ -48,7 +48,7 @@ done get_power_dissipation() { local npu_id="$1" # need to varify - power_dissipation=$(npu-smi info -t power -i ${npu_id} | grep 'Power Dissipation(W)' | awk '{print $2}') + power_dissipation=$(npu-smi info -t power -i ${npu_id} | grep 'Power Dissipation(W)' | awk '{print $4}') echo "${power_dissipation}" } diff --git a/run_once.sh b/run_once.sh index f15ed891..26817027 100644 --- a/run_once.sh +++ b/run_once.sh @@ -29,7 +29,7 @@ python prepare_yaml_file.py ${run_type} ${model} ${max_steps} ${run_name} ${outp export USE_MODELSCOPE_HUB=1 echo "Start recording npu status " -bash npu_status.sh ${output_dir} 10 1 & +bash npu_status.sh ${output_dir} 60 0 & npu_status_pid=$! echo "${npu_status_pid}" @@ -46,6 +46,6 @@ fi wait $train_pid echo "Train ended" -sleep 90 +sleep 60 kill $npu_status_pid echo "Npu status ended" \ No newline at end of file