fix: small change

This commit is contained in:
wql 2024-09-05 13:37:17 +08:00
parent 64044380bd
commit 190fddf27d
3 changed files with 16 additions and 12 deletions

View File

@ -6,7 +6,7 @@ import psutil
UNIT = 1024 * 1024 * 1024 UNIT = 1024 * 1024 * 1024
def gpu_status(output_path = "./results/gpu_status", print_status = False): def gpu_status(output_path = "./results/gpu_status", print_status = False, sleep_time = 60):
pynvml.nvmlInit() pynvml.nvmlInit()
gpuDeviceCount = pynvml.nvmlDeviceGetCount() gpuDeviceCount = pynvml.nvmlDeviceGetCount()
start_time = time.time() start_time = time.time()
@ -55,14 +55,15 @@ def gpu_status(output_path = "./results/gpu_status", print_status = False):
if print_status: if print_status:
print(logs) print(logs)
time.sleep(60) time.sleep(sleep_time)
pynvml.nvmlShutdown() pynvml.nvmlShutdown()
def main(): def main():
output_path = sys.argv[1] output_path = sys.argv[1]
print_status = sys.argv[2] print_status = sys.argv[2]
gpu_status(output_path, print_status) sleep_time = sys.argv[3]
gpu_status(output_path, print_status, sleep_time)
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -47,6 +47,8 @@ def main():
with open(f'{output_dir}/{run_name}.yaml', 'w', encoding='utf-8') as f: with open(f'{output_dir}/{run_name}.yaml', 'w', encoding='utf-8') as f:
yaml.dump(data=config, stream=f, allow_unicode=True) yaml.dump(data=config, stream=f, allow_unicode=True)
print(f"yaml file saved to {output_dir}/{run_name}.yaml")
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -26,16 +26,16 @@ fi
# echo "${run_type} ${model} ${gpu_cnt} ${max_steps} ${run_name} ${output_dir}" # echo "${run_type} ${model} ${gpu_cnt} ${max_steps} ${run_name} ${output_dir}"
python prepare_yaml_file.py ${run_type} ${model} ${max_steps} ${run_name} ${output_dir} python prepare_yaml_file.py ${run_type} ${model} ${max_steps} ${run_name} ${output_dir}
echo "yaml file save to ${output_dir}/${run_name}.yaml"
export USE_MODELSCOPE_HUB=1
# export USE_MODELSCOPE_HUB=1 echo "Start recording gpu status "
# 0 means not printing gpu status
# # 0 means not printing gpu status python gpu_status.py ${output_dir} 1 10 &
# python gpu_status.py ${output_dir} 0 & gpu_status_pid=$!
# gpu_status_pid=$! echo "${gpu_status_pid}"
# echo "Start recording gpu status "
sleep 60
# if [ "${gpu_cnt}"="1" ]; then # if [ "${gpu_cnt}"="1" ]; then
# ASCEND_RT_VISIBLE_DEVICES=0 llamafactory-cli train ${output_dir}/${run_name}.yml \ # ASCEND_RT_VISIBLE_DEVICES=0 llamafactory-cli train ${output_dir}/${run_name}.yml \
@ -52,5 +52,6 @@ echo "yaml file save to ${output_dir}/${run_name}.yaml"
# wait $train_pid # wait $train_pid
# echo "Train ended" # echo "Train ended"
# sleep 90 # sleep 90
# kill $gpu_status_pid
# echo "Gpu status ended" kill $gpu_status_pid
echo "Gpu status ended"