diff --git a/run_once.sh b/run_once.sh index d41a3ba9..493c0f60 100644 --- a/run_once.sh +++ b/run_once.sh @@ -37,24 +37,27 @@ python prepare_yaml_file.py ${run_type} ${model} ${max_steps} ${run_name} ${outp export USE_MODELSCOPE_HUB=1 -echo "Start recording npu status " -bash npu_status.sh ${output_dir} 60 0 & -npu_status_pid=$! -# echo "${npu_status_pid}" +# echo "Start recording npu status " +# bash npu_status.sh ${output_dir} 60 0 & +# npu_status_pid=$! +# # echo "${npu_status_pid}" -if [ "${gpu_cnt}" = "1" ]; then - ASCEND_RT_VISIBLE_DEVICES=0 llamafactory-cli train ${output_dir}/${run_name}.yaml | tee "${output_dir}/log.txt" & - train_pid=$! - echo "Start single npu train" -else - FORCE_TORCHRUN=1 llamafactory-cli train ${output_dir}/${run_name}.yaml | tee "${output_dir}/log.txt" & - train_pid=$! - echo "Start multi npu train" -fi +# if [ "${gpu_cnt}" = "1" ]; then +# ASCEND_RT_VISIBLE_DEVICES=0 llamafactory-cli train ${output_dir}/${run_name}.yaml | tee "${output_dir}/log.txt" & +# train_pid=$! +# echo "Start single npu train" +# else +# FORCE_TORCHRUN=1 llamafactory-cli train ${output_dir}/${run_name}.yaml | tee "${output_dir}/log.txt" & +# train_pid=$! +# echo "Start multi npu train" +# fi -wait $train_pid -echo "Train ended" +# wait $train_pid +# echo "Train ended" -sleep 60 -kill $npu_status_pid -echo "Npu status ended" \ No newline at end of file +# sleep 60 +# kill $npu_status_pid +# echo "Npu status ended" + +python3 -c "import moxing as mox; mox.file.copy_parallel('${output_dir}', 'obs://xty/results/${run_name}')" +rm -r ${output_dir} \ No newline at end of file