LLaMA-Factory-310P3/npu_status.sh

63 lines
1.7 KiB
Bash
Raw Normal View History

2024-09-06 14:03:54 +08:00
#!/bin/bash
output_dir=$1
sleep_time=$2
print_to_screen=$3
output_file_path="${output_dir}/gpu_status_$(date +"%Y%m%d%H%M%S").json"
device_cnt=$(npu-smi info | grep '310P3' | wc -l)
2024-09-11 09:54:50 +08:00
chip_id_arr_from_info=(0 1 0 1 0 1 0 1)
npu_id_arr_from_info=(2 2 3 3 5 5 6 6)
npu_id_arr=(2 3 5 6)
2024-09-06 14:03:54 +08:00
2024-09-11 10:02:59 +08:00
get_power_dissipation() {
local npu_id="$1"
# need to varify
power_dissipation=$(npu-smi info -t power -i ${npu_id} | grep 'Power Dissipation(W)' | awk '{print $4}')
echo "${power_dissipation}"
}
get_mem_usage() {
local chip="$1"
local device="$2"
mem_usage=$(npu-smi info | grep '${chip} ${device}' | awk '{print $8}')
echo "${mem_usage}"
}
2024-09-06 14:03:54 +08:00
loop_cnt=0
while true; do
device_mem_usage=""
for ((i=0; i<device_cnt; i++)); do
2024-09-11 10:02:59 +08:00
mem_usage="get_mem_usage ${chip_id_arr_from_info[i]} ${i}"
2024-09-06 14:03:54 +08:00
device_mem_usage="${device_mem_usage}{"npu_id": ${npu_id_arr_from_info[i]}, "chip_id": ${chip_id_arr_from_info[i]}, "device_id": ${i}, "mem_usage": ${mem_usage}}, "
done
device_mem_usage="${device_mem_usage%??}"
npu_power_dissipation=""
for npu_id in "${npu_id_arr[@]}"; do
2024-09-11 10:02:59 +08:00
power_dissipation="get_power_dissipation ${npu_id}"
2024-09-06 14:03:54 +08:00
npu_power_dissipation="${npu_power_dissipation}{"npu_id": ${npu_id}, "power_dissipation": ${power_dissipation}}, "
done
npu_power_dissipation="${npu_power_dissipation%??}"
cur_time=$(date +"%Y-%m-%d %H:%M:%S")
json="{\"cur_time\": \"${cur_time}\", \"npu_power_dissipation\": [${npu_power_dissipation}], \"device_mem_usage\": [${device_mem_usage}]}"
echo "$json" >> output_file_path
2024-09-11 10:02:59 +08:00
sleep "${sleep_time}"
2024-09-06 14:03:54 +08:00
if [ "$print_to_screen" -eq "1" ]; then
echo "$json"
fi
loop_cnt+=1
if [ "$loop_cnt" -eq "1500" ]; then
exit 0
fi
done