fix: small change
This commit is contained in:
parent
64044380bd
commit
190fddf27d
|
@ -6,7 +6,7 @@ import psutil
|
|||
|
||||
UNIT = 1024 * 1024 * 1024
|
||||
|
||||
def gpu_status(output_path = "./results/gpu_status", print_status = False):
|
||||
def gpu_status(output_path = "./results/gpu_status", print_status = False, sleep_time = 60):
|
||||
pynvml.nvmlInit()
|
||||
gpuDeviceCount = pynvml.nvmlDeviceGetCount()
|
||||
start_time = time.time()
|
||||
|
@ -55,14 +55,15 @@ def gpu_status(output_path = "./results/gpu_status", print_status = False):
|
|||
if print_status:
|
||||
print(logs)
|
||||
|
||||
time.sleep(60)
|
||||
time.sleep(sleep_time)
|
||||
|
||||
pynvml.nvmlShutdown()
|
||||
|
||||
def main():
|
||||
output_path = sys.argv[1]
|
||||
print_status = sys.argv[2]
|
||||
gpu_status(output_path, print_status)
|
||||
sleep_time = sys.argv[3]
|
||||
gpu_status(output_path, print_status, sleep_time)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
@ -47,6 +47,8 @@ def main():
|
|||
|
||||
with open(f'{output_dir}/{run_name}.yaml', 'w', encoding='utf-8') as f:
|
||||
yaml.dump(data=config, stream=f, allow_unicode=True)
|
||||
|
||||
print(f"yaml file saved to {output_dir}/{run_name}.yaml")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
19
run_once.sh
19
run_once.sh
|
@ -26,16 +26,16 @@ fi
|
|||
|
||||
# echo "${run_type} ${model} ${gpu_cnt} ${max_steps} ${run_name} ${output_dir}"
|
||||
python prepare_yaml_file.py ${run_type} ${model} ${max_steps} ${run_name} ${output_dir}
|
||||
echo "yaml file save to ${output_dir}/${run_name}.yaml"
|
||||
|
||||
export USE_MODELSCOPE_HUB=1
|
||||
|
||||
# export USE_MODELSCOPE_HUB=1
|
||||
|
||||
# # 0 means not printing gpu status
|
||||
# python gpu_status.py ${output_dir} 0 &
|
||||
# gpu_status_pid=$!
|
||||
# echo "Start recording gpu status "
|
||||
echo "Start recording gpu status "
|
||||
# 0 means not printing gpu status
|
||||
python gpu_status.py ${output_dir} 1 10 &
|
||||
gpu_status_pid=$!
|
||||
echo "${gpu_status_pid}"
|
||||
|
||||
sleep 60
|
||||
|
||||
# if [ "${gpu_cnt}"="1" ]; then
|
||||
# ASCEND_RT_VISIBLE_DEVICES=0 llamafactory-cli train ${output_dir}/${run_name}.yml \
|
||||
|
@ -52,5 +52,6 @@ echo "yaml file save to ${output_dir}/${run_name}.yaml"
|
|||
# wait $train_pid
|
||||
# echo "Train ended"
|
||||
# sleep 90
|
||||
# kill $gpu_status_pid
|
||||
# echo "Gpu status ended"
|
||||
|
||||
kill $gpu_status_pid
|
||||
echo "Gpu status ended"
|
Loading…
Reference in New Issue