chore: set ASCEND_RT_VISIBLE_DEVICES

2024-09-19 15:25:12 +08:00 · 2024-09-19 15:25:12 +08:00 · 647525952d
parent 5a306611bc
commit 647525952d
2 changed files with 24 additions and 2 deletions
--- a/batch_run.sh
+++ b/batch_run.sh
@ -4,6 +4,8 @@
 repeat_cnt=$1
 export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
 for ((i=0; i<repeat_cnt; i++)); do
    bash run_once.sh lora_sft Qwen-7B 8 500
    bash run_once.sh lora_sft Qwen-7B 8 1000
@ -22,4 +24,24 @@ for ((i=0; i<repeat_cnt; i++)); do
    bash run_once.sh inference Baichuan2-7B 8 1000
    bash run_once.sh inference Baichuan2-7B 1 500
    bash run_once.sh inference Baichuan2-7B 1 1000
    bash run_once.sh lora_sft chatglm2-6B 8 500
    bash run_once.sh lora_sft chatglm2-6B 8 1000
    bash run_once.sh lora_sft chatglm2-6B 1 500
    bash run_once.sh lora_sft chatglm2-6B 1 1000
    bash run_once.sh lora_sft llama-2-7b-ms 8 500
    bash run_once.sh lora_sft llama-2-7b-ms 8 1000
    bash run_once.sh lora_sft llama-2-7b-ms 1 500
    bash run_once.sh lora_sft llama-2-7b-ms 1 1000
    bash run_once.sh inference chatglm2-6B 8 500
    bash run_once.sh inference chatglm2-6B 8 1000
    bash run_once.sh inference chatglm2-6B 1 500
    bash run_once.sh inference chatglm2-6B 1 1000
    bash run_once.sh inference llama-2-7b-ms 8 500
    bash run_once.sh inference llama-2-7b-ms 8 1000
    bash run_once.sh inference llama-2-7b-ms 1 500
    bash run_once.sh inference llama-2-7b-ms 1 1000
 done
--- a/run_once.sh
+++ b/run_once.sh
@ -40,11 +40,11 @@ npu_status_pid=$!
 if [ "${gpu_cnt}"="1" ]; then
   ASCEND_RT_VISIBLE_DEVICES=0 llamafactory-cli train ${output_dir}/${run_name}.yaml | tee "${output_dir}/log.txt" &
   train_pid=$!
-   echo "Start train"
+   echo "Start single npu train"
 else
   FORCE_TORCHRUN=1 llamafactory-cli train ${output_dir}/${run_name}.yaml | tee "${output_dir}/log.txt" &
   train_pid=$!
-   echo "Start train"
+   echo "Start multi npu train"
 fi
 wait $train_pid