diff --git a/batch_run.sh b/batch_run.sh index 3bcd704f..bf2af816 100644 --- a/batch_run.sh +++ b/batch_run.sh @@ -1,56 +1,58 @@ -FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft_2/Llama2-7B/llama2_lora_sft_1.yaml | tee results/lora_sft_2/Llama2-7B/llama2_lora_sft_1_log.txt -FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft_2/Llama2-7B/llama2_lora_sft_2.yaml | tee results/lora_sft_2/Llama2-7B/llama2_lora_sft_2_log.txt -FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft_2/Llama2-7B/llama2_lora_sft_3.yaml | tee results/lora_sft_2/Llama2-7B/llama2_lora_sft_3_log.txt -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft_2/Llama2-7B/llama2_lora_sft_1_single.yaml | tee results/lora_sft_2/Llama2-7B/llama2_lora_sft_1_single_log.txt -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft_2/Llama2-7B/llama2_lora_sft_2_single.yaml | tee results/lora_sft_2/Llama2-7B/llama2_lora_sft_2_single_log.txt -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft_2/Llama2-7B/llama2_lora_sft_3_single.yaml | tee results/lora_sft_2/Llama2-7B/llama2_lora_sft_3_single_log.txt +FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft_2/Llama2-7B/llama2_lora_sft_1.yaml | tee results/lora_sft_2/Llama2-7B/llama2_lora_sft_1_step500_log.txt +FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft_2/Llama2-7B/llama2_lora_sft_2.yaml | tee results/lora_sft_2/Llama2-7B/llama2_lora_sft_2_step500_log.txt +FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft_2/Llama2-7B/llama2_lora_sft_3.yaml | tee results/lora_sft_2/Llama2-7B/llama2_lora_sft_3_step500_log.txt +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft_2/Llama2-7B/llama2_lora_sft_1_single.yaml | tee results/lora_sft_2/Llama2-7B/llama2_lora_sft_1_single_step500_log.txt +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft_2/Llama2-7B/llama2_lora_sft_2_single.yaml | tee results/lora_sft_2/Llama2-7B/llama2_lora_sft_2_single_step500_log.txt +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft_2/Llama2-7B/llama2_lora_sft_3_single.yaml | tee results/lora_sft_2/Llama2-7B/llama2_lora_sft_3_single_step500_log.txt -# FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_1.yaml | tee results/inference/Llama2-7B/llama2_predict_1_log.txt -# FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_2.yaml | tee results/inference/Llama2-7B/llama2_predict_2_log.txt -# FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_3.yaml | tee results/inference/Llama2-7B/llama2_predict_3_log.txt -# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_1_single.yaml | tee results/inference/Llama2-7B/llama2_predict_1_single_log.txt -# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_2_single.yaml | tee results/inference/Llama2-7B/llama2_predict_2_single_log.txt -# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_3_single.yaml | tee results/inference/Llama2-7B/llama2_predict_3_single_log.txt +FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1.yaml | tee results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1_step500_log.txt +FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2.yaml | tee results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2_step500_log.txt +FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3.yaml | tee results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3_step500_log.txt +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1_single.yaml | tee results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1_single_step500_log.txt +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2_single.yaml | tee results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2_single_step500_log.txt +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3_single.yaml | tee results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3_single_step500_log.txt -FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1.yaml | tee results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1_log.txt -FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2.yaml | tee results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2_log.txt -FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3.yaml | tee results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3_log.txt -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1_single.yaml | tee results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1_single_log.txt -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2_single.yaml | tee results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2_single_log.txt -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3_single.yaml | tee results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3_single_log.txt +FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1.yaml | tee results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1_step500_log.txt +FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2.yaml | tee results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2_step500_log.txt +FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3.yaml | tee results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3_step500_log.txt +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1_single.yaml | tee results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1_single_step500_log.txt +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2_single.yaml | tee results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2_single_step500_log.txt +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3_single.yaml | tee results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3_single_step500_log.txt -FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1.yaml | tee results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1_log.txt -FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2.yaml | tee results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2_log.txt -FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3.yaml | tee results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3_log.txt -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1_single.yaml | tee results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1_single_log.txt -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2_single.yaml | tee results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2_single_log.txt -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3_single.yaml | tee results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3_single_log.txt +FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1.yaml | tee results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1_step500_log.txt +FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2.yaml | tee results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2_step500_log.txt +FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3.yaml | tee results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3_step500_log.txt +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1_single.yaml | tee results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1_single_step500_log.txt +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2_single.yaml | tee results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2_single_step500_log.txt +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3_single.yaml | tee results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3_single_step500_log.txt -FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1.yaml | tee results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1_log.txt -FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2.yaml | tee results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2_log.txt -FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3.yaml | tee results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3_log.txt -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1_single.yaml | tee results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1_single_log.txt -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2_single.yaml | tee results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2_single_log.txt -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3_single.yaml | tee results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3_single_log.txt -# FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Qwen-7B/Qwen_predict_1.yaml | tee results/inference/Qwen-7B/Qwen_predict_1_log.txt -# FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Qwen-7B/Qwen_predict_2.yaml | tee results/inference/Qwen-7B/Qwen_predict_2_log.txt -# FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Qwen-7B/Qwen_predict_3.yaml | tee results/inference/Qwen-7B/Qwen_predict_3_log.txt -# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Qwen-7B/Qwen_predict_1_single.yaml | tee results/inference/Qwen-7B/Qwen_predict_1_single_log.txt -# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Qwen-7B/Qwen_predict_2_single.yaml | tee results/inference/Qwen-7B/Qwen_predict_2_single_log.txt -# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Qwen-7B/Qwen_predict_3_single.yaml | tee results/inference/Qwen-7B/Qwen_predict_3_single_log.txt +# FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_1.yaml | tee results/inference/Llama2-7B/llama2_predict_1_step500_log.txt +# FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_2.yaml | tee results/inference/Llama2-7B/llama2_predict_2_step500_log.txt +# FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_3.yaml | tee results/inference/Llama2-7B/llama2_predict_3_step500_log.txt +# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_1_single.yaml | tee results/inference/Llama2-7B/llama2_predict_1_single_step500_log.txt +# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_2_single.yaml | tee results/inference/Llama2-7B/llama2_predict_2_single_step500_log.txt +# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_3_single.yaml | tee results/inference/Llama2-7B/llama2_predict_3_single_step500_log.txt -# FORCE_TORCHRUN=1 llamafactory-cli train results/inference/ChatGLM2-6B/ChatGLM2_predict_1.yaml | tee results/inference/ChatGLM2-6B/ChatGLM2_predict_1_log.txt -# FORCE_TORCHRUN=1 llamafactory-cli train results/inference/ChatGLM2-6B/ChatGLM2_predict_2.yaml | tee results/inference/ChatGLM2-6B/ChatGLM2_predict_2_log.txt -# FORCE_TORCHRUN=1 llamafactory-cli train results/inference/ChatGLM2-6B/ChatGLM2_predict_3.yaml | tee results/inference/ChatGLM2-6B/ChatGLM2_predict_3_log.txt -# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/ChatGLM2-6B/ChatGLM2_predict_1_single.yaml | tee results/inference/ChatGLM2-6B/ChatGLM2_predict_1_single_log.txt -# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/ChatGLM2-6B/ChatGLM2_predict_2_single.yaml | tee results/inference/ChatGLM2-6B/ChatGLM2_predict_2_single_log.txt -# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/ChatGLM2-6B/ChatGLM2_predict_3_single.yaml | tee results/inference/ChatGLM2-6B/ChatGLM2_predict_3_single_log.txt -# FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Baichuan2-7B/Baichuan2_predict_1.yaml | tee results/inference/Baichuan2-7B/Baichuan2_predict_1_log.txt -# FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Baichuan2-7B/Baichuan2_predict_2.yaml | tee results/inference/Baichuan2-7B/Baichuan2_predict_2_log.txt -# FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Baichuan2-7B/Baichuan2_predict_3.yaml | tee results/inference/Baichuan2-7B/Baichuan2_predict_3_log.txt -# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Baichuan2-7B/Baichuan2_predict_1_single.yaml | tee results/inference/Baichuan2-7B/Baichuan2_predict_1_single_log.txt -# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Baichuan2-7B/Baichuan2_predict_2_single.yaml | tee results/inference/Baichuan2-7B/Baichuan2_predict_2_single_log.txt -# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Baichuan2-7B/Baichuan2_predict_3_single.yaml | tee results/inference/Baichuan2-7B/Baichuan2_predict_3_single_log.txt +# FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Qwen-7B/Qwen_predict_1.yaml | tee results/inference/Qwen-7B/Qwen_predict_1_step500_log.txt +# FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Qwen-7B/Qwen_predict_2.yaml | tee results/inference/Qwen-7B/Qwen_predict_2_step500_log.txt +# FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Qwen-7B/Qwen_predict_3.yaml | tee results/inference/Qwen-7B/Qwen_predict_3_step500_log.txt +# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Qwen-7B/Qwen_predict_1_single.yaml | tee results/inference/Qwen-7B/Qwen_predict_1_single_step500_log.txt +# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Qwen-7B/Qwen_predict_2_single.yaml | tee results/inference/Qwen-7B/Qwen_predict_2_single_step500_log.txt +# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Qwen-7B/Qwen_predict_3_single.yaml | tee results/inference/Qwen-7B/Qwen_predict_3_single_step500_log.txt + +# FORCE_TORCHRUN=1 llamafactory-cli train results/inference/ChatGLM2-6B/ChatGLM2_predict_1.yaml | tee results/inference/ChatGLM2-6B/ChatGLM2_predict_1_step500_log.txt +# FORCE_TORCHRUN=1 llamafactory-cli train results/inference/ChatGLM2-6B/ChatGLM2_predict_2.yaml | tee results/inference/ChatGLM2-6B/ChatGLM2_predict_2_step500_log.txt +# FORCE_TORCHRUN=1 llamafactory-cli train results/inference/ChatGLM2-6B/ChatGLM2_predict_3.yaml | tee results/inference/ChatGLM2-6B/ChatGLM2_predict_3_step500_log.txt +# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/ChatGLM2-6B/ChatGLM2_predict_1_single.yaml | tee results/inference/ChatGLM2-6B/ChatGLM2_predict_1_single_step500_log.txt +# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/ChatGLM2-6B/ChatGLM2_predict_2_single.yaml | tee results/inference/ChatGLM2-6B/ChatGLM2_predict_2_single_step500_log.txt +# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/ChatGLM2-6B/ChatGLM2_predict_3_single.yaml | tee results/inference/ChatGLM2-6B/ChatGLM2_predict_3_single_step500_log.txt + +# FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Baichuan2-7B/Baichuan2_predict_1.yaml | tee results/inference/Baichuan2-7B/Baichuan2_predict_1_step500_log.txt +# FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Baichuan2-7B/Baichuan2_predict_2.yaml | tee results/inference/Baichuan2-7B/Baichuan2_predict_2_step500_log.txt +# FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Baichuan2-7B/Baichuan2_predict_3.yaml | tee results/inference/Baichuan2-7B/Baichuan2_predict_3_step500_log.txt +# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Baichuan2-7B/Baichuan2_predict_1_single.yaml | tee results/inference/Baichuan2-7B/Baichuan2_predict_1_single_step500_log.txt +# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Baichuan2-7B/Baichuan2_predict_2_single.yaml | tee results/inference/Baichuan2-7B/Baichuan2_predict_2_single_step500_log.txt +# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Baichuan2-7B/Baichuan2_predict_3_single.yaml | tee results/inference/Baichuan2-7B/Baichuan2_predict_3_single_step500_log.txt diff --git a/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1.yaml b/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1.yaml index 06c432b0..f714d944 100644 --- a/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1.yaml +++ b/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1 +output_dir: ./results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1_step500 logging_steps: 3 save_steps: 500 plot_loss: true @@ -31,7 +31,7 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 -max_steps: 1000 +max_steps: 500 include_num_input_tokens_seen: true include_tokens_per_second: true diff --git a/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1_single.yaml b/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1_single.yaml index 96d0b75c..a3b42642 100644 --- a/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1_single.yaml +++ b/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1_single.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1_single +output_dir: ./results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1_single_step500 logging_steps: 3 save_steps: 500 plot_loss: true @@ -31,7 +31,7 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 -max_steps: 1000 +max_steps: 500 include_num_input_tokens_seen: true include_tokens_per_second: true diff --git a/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2.yaml b/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2.yaml index ceba3c34..e08567f8 100644 --- a/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2.yaml +++ b/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2 +output_dir: ./results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2_step500 logging_steps: 3 save_steps: 500 plot_loss: true @@ -31,7 +31,7 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 -max_steps: 1000 +max_steps: 500 include_num_input_tokens_seen: true include_tokens_per_second: true diff --git a/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2_single.yaml b/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2_single.yaml index af348d0a..f584e173 100644 --- a/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2_single.yaml +++ b/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2_single.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2_single +output_dir: ./results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2_single_step500 logging_steps: 3 save_steps: 500 plot_loss: true @@ -31,7 +31,7 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 -max_steps: 1000 +max_steps: 500 include_num_input_tokens_seen: true include_tokens_per_second: true diff --git a/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3.yaml b/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3.yaml index f1327f4d..33a8c42e 100644 --- a/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3.yaml +++ b/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3 +output_dir: ./results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3_step500 logging_steps: 3 save_steps: 500 plot_loss: true @@ -31,7 +31,7 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 -max_steps: 1000 +max_steps: 500 include_num_input_tokens_seen: true include_tokens_per_second: true diff --git a/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3_single.yaml b/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3_single.yaml index c303a4cf..bd5a5b03 100644 --- a/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3_single.yaml +++ b/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3_single.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3_single +output_dir: ./results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3_single_step500 logging_steps: 3 save_steps: 500 plot_loss: true @@ -31,7 +31,7 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 -max_steps: 1000 +max_steps: 500 include_num_input_tokens_seen: true include_tokens_per_second: true diff --git a/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1.yaml b/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1.yaml index abfe8a57..933a2bee 100644 --- a/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1.yaml +++ b/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1 +output_dir: ./results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1_step500 logging_steps: 3 save_steps: 500 plot_loss: true @@ -31,7 +31,7 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 -max_steps: 1000 +max_steps: 500 include_num_input_tokens_seen: true include_tokens_per_second: true diff --git a/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1_single.yaml b/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1_single.yaml index 7c052875..82c84c32 100644 --- a/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1_single.yaml +++ b/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1_single.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1_single +output_dir: ./results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1_single_step500 logging_steps: 3 save_steps: 500 plot_loss: true @@ -31,7 +31,7 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 -max_steps: 1000 +max_steps: 500 include_num_input_tokens_seen: true include_tokens_per_second: true diff --git a/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2.yaml b/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2.yaml index 4e0b0c80..c2ca1ade 100644 --- a/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2.yaml +++ b/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2 +output_dir: ./results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2_step500 logging_steps: 3 save_steps: 500 plot_loss: true @@ -31,7 +31,7 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 -max_steps: 1000 +max_steps: 500 include_num_input_tokens_seen: true include_tokens_per_second: true diff --git a/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2_single.yaml b/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2_single.yaml index 6dde6001..34f03bd4 100644 --- a/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2_single.yaml +++ b/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2_single.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2_single +output_dir: ./results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2_single_step500 logging_steps: 3 save_steps: 500 plot_loss: true @@ -31,7 +31,7 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 -max_steps: 1000 +max_steps: 500 include_num_input_tokens_seen: true include_tokens_per_second: true diff --git a/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3.yaml b/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3.yaml index bfc6fc60..fb33da23 100644 --- a/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3.yaml +++ b/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3 +output_dir: ./results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3_step500 logging_steps: 3 save_steps: 500 plot_loss: true @@ -31,7 +31,7 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 -max_steps: 1000 +max_steps: 500 include_num_input_tokens_seen: true include_tokens_per_second: true diff --git a/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3_single.yaml b/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3_single.yaml index ea9873ce..ca34e655 100644 --- a/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3_single.yaml +++ b/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3_single.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3_single +output_dir: ./results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3_single_step500 logging_steps: 3 save_steps: 500 plot_loss: true @@ -31,7 +31,7 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 -max_steps: 1000 +max_steps: 500 include_num_input_tokens_seen: true include_tokens_per_second: true diff --git a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_1.yaml b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_1.yaml index f5d93d50..e2917541 100644 --- a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_1.yaml +++ b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_1.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft_2/Llama2-7B/llama2_lora_sft_1 +output_dir: ./results/lora_sft_2/Llama2-7B/llama2_lora_sft_1_step500 logging_steps: 3 save_steps: 500 plot_loss: true @@ -31,7 +31,7 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 -max_steps: 1000 +max_steps: 500 include_num_input_tokens_seen: true include_tokens_per_second: true diff --git a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_1_single.yaml b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_1_single.yaml index 6e5a9a55..257a4f41 100644 --- a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_1_single.yaml +++ b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_1_single.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft_2/Llama2-7B/llama2_lora_sft_1_single +output_dir: ./results/lora_sft_2/Llama2-7B/llama2_lora_sft_1_single_step500 logging_steps: 3 save_steps: 500 plot_loss: true @@ -31,7 +31,7 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 -max_steps: 1000 +max_steps: 500 include_num_input_tokens_seen: true include_tokens_per_second: true diff --git a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_2.yaml b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_2.yaml index 90929864..5649fb89 100644 --- a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_2.yaml +++ b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_2.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft_2/Llama2-7B/llama2_lora_sft_2 +output_dir: ./results/lora_sft_2/Llama2-7B/llama2_lora_sft_2_step500 logging_steps: 3 save_steps: 500 plot_loss: true @@ -31,7 +31,7 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 -max_steps: 1000 +max_steps: 500 include_num_input_tokens_seen: true include_tokens_per_second: true diff --git a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_2_single.yaml b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_2_single.yaml index f85cfba3..603d8f9b 100644 --- a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_2_single.yaml +++ b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_2_single.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft_2/Llama2-7B/llama2_lora_sft_2_single +output_dir: ./results/lora_sft_2/Llama2-7B/llama2_lora_sft_2_single_step500 logging_steps: 3 save_steps: 500 plot_loss: true @@ -31,7 +31,7 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 -max_steps: 1000 +max_steps: 500 include_num_input_tokens_seen: true include_tokens_per_second: true diff --git a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_3.yaml b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_3.yaml index 943ccbdc..ed963adb 100644 --- a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_3.yaml +++ b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_3.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft_2/Llama2-7B/llama2_lora_sft_3 +output_dir: ./results/lora_sft_2/Llama2-7B/llama2_lora_sft_3_step500 logging_steps: 3 save_steps: 500 plot_loss: true @@ -31,7 +31,7 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 -max_steps: 1000 +max_steps: 500 include_num_input_tokens_seen: true include_tokens_per_second: true diff --git a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_3_single.yaml b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_3_single.yaml index f174adc4..69f4af36 100644 --- a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_3_single.yaml +++ b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_3_single.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft_2/Llama2-7B/llama2_lora_sft_3_single +output_dir: ./results/lora_sft_2/Llama2-7B/llama2_lora_sft_3_single_step500 logging_steps: 3 save_steps: 500 plot_loss: true @@ -31,7 +31,7 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 -max_steps: 1000 +max_steps: 500 include_num_input_tokens_seen: true include_tokens_per_second: true diff --git a/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1.yaml b/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1.yaml index 3cf84cdc..761c30f3 100644 --- a/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1.yaml +++ b/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1 +output_dir: ./results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1_step500 logging_steps: 3 save_steps: 500 plot_loss: true @@ -31,7 +31,7 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 -max_steps: 1000 +max_steps: 500 include_num_input_tokens_seen: true include_tokens_per_second: true diff --git a/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1_single.yaml b/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1_single.yaml index 0a64c21d..6e3a8ac2 100644 --- a/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1_single.yaml +++ b/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1_single.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1_single +output_dir: ./results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1_single_step500 logging_steps: 3 save_steps: 500 plot_loss: true @@ -31,7 +31,7 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 -max_steps: 1000 +max_steps: 500 include_num_input_tokens_seen: true include_tokens_per_second: true diff --git a/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2.yaml b/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2.yaml index 6dd0f294..a377471b 100644 --- a/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2.yaml +++ b/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2 +output_dir: ./results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2_step500 logging_steps: 3 save_steps: 500 plot_loss: true @@ -31,7 +31,7 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 -max_steps: 1000 +max_steps: 500 include_num_input_tokens_seen: true include_tokens_per_second: true diff --git a/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2_single.yaml b/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2_single.yaml index fcf35648..b21fa7b5 100644 --- a/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2_single.yaml +++ b/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2_single.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2_single +output_dir: ./results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2_single_step500 logging_steps: 3 save_steps: 500 plot_loss: true @@ -31,7 +31,7 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 -max_steps: 1000 +max_steps: 500 include_num_input_tokens_seen: true include_tokens_per_second: true diff --git a/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3.yaml b/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3.yaml index 01abbdce..0f9b5be9 100644 --- a/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3.yaml +++ b/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3 +output_dir: ./results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3_step500 logging_steps: 3 save_steps: 500 plot_loss: true @@ -31,7 +31,7 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 -max_steps: 1000 +max_steps: 500 include_num_input_tokens_seen: true include_tokens_per_second: true diff --git a/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3_single.yaml b/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3_single.yaml index 49f1237f..bbd53d20 100644 --- a/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3_single.yaml +++ b/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3_single.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3_single +output_dir: ./results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3_single_step500 logging_steps: 3 save_steps: 500 plot_loss: true @@ -31,7 +31,7 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 -max_steps: 1000 +max_steps: 500 include_num_input_tokens_seen: true include_tokens_per_second: true