fix: small change
This commit is contained in:
parent
64044380bd
commit
190fddf27d
|
@ -6,7 +6,7 @@ import psutil
|
||||||
|
|
||||||
UNIT = 1024 * 1024 * 1024
|
UNIT = 1024 * 1024 * 1024
|
||||||
|
|
||||||
def gpu_status(output_path = "./results/gpu_status", print_status = False):
|
def gpu_status(output_path = "./results/gpu_status", print_status = False, sleep_time = 60):
|
||||||
pynvml.nvmlInit()
|
pynvml.nvmlInit()
|
||||||
gpuDeviceCount = pynvml.nvmlDeviceGetCount()
|
gpuDeviceCount = pynvml.nvmlDeviceGetCount()
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
@ -55,14 +55,15 @@ def gpu_status(output_path = "./results/gpu_status", print_status = False):
|
||||||
if print_status:
|
if print_status:
|
||||||
print(logs)
|
print(logs)
|
||||||
|
|
||||||
time.sleep(60)
|
time.sleep(sleep_time)
|
||||||
|
|
||||||
pynvml.nvmlShutdown()
|
pynvml.nvmlShutdown()
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
output_path = sys.argv[1]
|
output_path = sys.argv[1]
|
||||||
print_status = sys.argv[2]
|
print_status = sys.argv[2]
|
||||||
gpu_status(output_path, print_status)
|
sleep_time = sys.argv[3]
|
||||||
|
gpu_status(output_path, print_status, sleep_time)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
|
@ -47,6 +47,8 @@ def main():
|
||||||
|
|
||||||
with open(f'{output_dir}/{run_name}.yaml', 'w', encoding='utf-8') as f:
|
with open(f'{output_dir}/{run_name}.yaml', 'w', encoding='utf-8') as f:
|
||||||
yaml.dump(data=config, stream=f, allow_unicode=True)
|
yaml.dump(data=config, stream=f, allow_unicode=True)
|
||||||
|
|
||||||
|
print(f"yaml file saved to {output_dir}/{run_name}.yaml")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
19
run_once.sh
19
run_once.sh
|
@ -26,16 +26,16 @@ fi
|
||||||
|
|
||||||
# echo "${run_type} ${model} ${gpu_cnt} ${max_steps} ${run_name} ${output_dir}"
|
# echo "${run_type} ${model} ${gpu_cnt} ${max_steps} ${run_name} ${output_dir}"
|
||||||
python prepare_yaml_file.py ${run_type} ${model} ${max_steps} ${run_name} ${output_dir}
|
python prepare_yaml_file.py ${run_type} ${model} ${max_steps} ${run_name} ${output_dir}
|
||||||
echo "yaml file save to ${output_dir}/${run_name}.yaml"
|
|
||||||
|
|
||||||
|
export USE_MODELSCOPE_HUB=1
|
||||||
|
|
||||||
# export USE_MODELSCOPE_HUB=1
|
echo "Start recording gpu status "
|
||||||
|
# 0 means not printing gpu status
|
||||||
# # 0 means not printing gpu status
|
python gpu_status.py ${output_dir} 1 10 &
|
||||||
# python gpu_status.py ${output_dir} 0 &
|
gpu_status_pid=$!
|
||||||
# gpu_status_pid=$!
|
echo "${gpu_status_pid}"
|
||||||
# echo "Start recording gpu status "
|
|
||||||
|
|
||||||
|
sleep 60
|
||||||
|
|
||||||
# if [ "${gpu_cnt}"="1" ]; then
|
# if [ "${gpu_cnt}"="1" ]; then
|
||||||
# ASCEND_RT_VISIBLE_DEVICES=0 llamafactory-cli train ${output_dir}/${run_name}.yml \
|
# ASCEND_RT_VISIBLE_DEVICES=0 llamafactory-cli train ${output_dir}/${run_name}.yml \
|
||||||
|
@ -52,5 +52,6 @@ echo "yaml file save to ${output_dir}/${run_name}.yaml"
|
||||||
# wait $train_pid
|
# wait $train_pid
|
||||||
# echo "Train ended"
|
# echo "Train ended"
|
||||||
# sleep 90
|
# sleep 90
|
||||||
# kill $gpu_status_pid
|
|
||||||
# echo "Gpu status ended"
|
kill $gpu_status_pid
|
||||||
|
echo "Gpu status ended"
|
Loading…
Reference in New Issue