chore: set ASCEND_RT_VISIBLE_DEVICES

This commit is contained in:
wql 2024-09-19 15:25:12 +08:00
parent 5a306611bc
commit 647525952d
2 changed files with 24 additions and 2 deletions

View File

@ -4,6 +4,8 @@
repeat_cnt=$1 repeat_cnt=$1
export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
for ((i=0; i<repeat_cnt; i++)); do for ((i=0; i<repeat_cnt; i++)); do
bash run_once.sh lora_sft Qwen-7B 8 500 bash run_once.sh lora_sft Qwen-7B 8 500
bash run_once.sh lora_sft Qwen-7B 8 1000 bash run_once.sh lora_sft Qwen-7B 8 1000
@ -22,4 +24,24 @@ for ((i=0; i<repeat_cnt; i++)); do
bash run_once.sh inference Baichuan2-7B 8 1000 bash run_once.sh inference Baichuan2-7B 8 1000
bash run_once.sh inference Baichuan2-7B 1 500 bash run_once.sh inference Baichuan2-7B 1 500
bash run_once.sh inference Baichuan2-7B 1 1000 bash run_once.sh inference Baichuan2-7B 1 1000
bash run_once.sh lora_sft chatglm2-6B 8 500
bash run_once.sh lora_sft chatglm2-6B 8 1000
bash run_once.sh lora_sft chatglm2-6B 1 500
bash run_once.sh lora_sft chatglm2-6B 1 1000
bash run_once.sh lora_sft llama-2-7b-ms 8 500
bash run_once.sh lora_sft llama-2-7b-ms 8 1000
bash run_once.sh lora_sft llama-2-7b-ms 1 500
bash run_once.sh lora_sft llama-2-7b-ms 1 1000
bash run_once.sh inference chatglm2-6B 8 500
bash run_once.sh inference chatglm2-6B 8 1000
bash run_once.sh inference chatglm2-6B 1 500
bash run_once.sh inference chatglm2-6B 1 1000
bash run_once.sh inference llama-2-7b-ms 8 500
bash run_once.sh inference llama-2-7b-ms 8 1000
bash run_once.sh inference llama-2-7b-ms 1 500
bash run_once.sh inference llama-2-7b-ms 1 1000
done done

View File

@ -40,11 +40,11 @@ npu_status_pid=$!
if [ "${gpu_cnt}"="1" ]; then if [ "${gpu_cnt}"="1" ]; then
ASCEND_RT_VISIBLE_DEVICES=0 llamafactory-cli train ${output_dir}/${run_name}.yaml | tee "${output_dir}/log.txt" & ASCEND_RT_VISIBLE_DEVICES=0 llamafactory-cli train ${output_dir}/${run_name}.yaml | tee "${output_dir}/log.txt" &
train_pid=$! train_pid=$!
echo "Start train" echo "Start single npu train"
else else
FORCE_TORCHRUN=1 llamafactory-cli train ${output_dir}/${run_name}.yaml | tee "${output_dir}/log.txt" & FORCE_TORCHRUN=1 llamafactory-cli train ${output_dir}/${run_name}.yaml | tee "${output_dir}/log.txt" &
train_pid=$! train_pid=$!
echo "Start train" echo "Start multi npu train"
fi fi
wait $train_pid wait $train_pid