diff --git a/batch_run.sh b/batch_run.sh index 26c125b0..c4b3e3ac 100644 --- a/batch_run.sh +++ b/batch_run.sh @@ -5,12 +5,12 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft/Llama2-7B/llama2_ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft/Llama2-7B/llama2_lora_sft_2_single.yaml | tee results/lora_sft/Llama2-7B/llama2_lora_sft_2_single.txt CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft/Llama2-7B/llama2_lora_sft_3_single.yaml | tee results/lora_sft/Llama2-7B/llama2_lora_sft_3_single.txt -FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_1.yaml | tee results/inference/Llama2-7B/llama2_predict_1.txt -FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_2.yaml | tee results/inference/Llama2-7B/llama2_predict_2.txt -FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_3.yaml | tee results/inference/Llama2-7B/llama2_predict_3.txt -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_1_single.yaml | tee results/inference/Llama2-7B/llama2_predict_1_single.txt -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_2_single.yaml | tee results/inference/Llama2-7B/llama2_predict_2_single.txt -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_3_single.yaml | tee results/inference/Llama2-7B/llama2_predict_3_single.txt +# FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_1.yaml | tee results/inference/Llama2-7B/llama2_predict_1.txt +# FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_2.yaml | tee results/inference/Llama2-7B/llama2_predict_2.txt +# FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_3.yaml | tee results/inference/Llama2-7B/llama2_predict_3.txt +# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_1_single.yaml | tee results/inference/Llama2-7B/llama2_predict_1_single.txt +# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_2_single.yaml | tee results/inference/Llama2-7B/llama2_predict_2_single.txt +# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_3_single.yaml | tee results/inference/Llama2-7B/llama2_predict_3_single.txt # FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_1.yaml | tee results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_1.txt # FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_2.yaml | tee results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_2.txt @@ -19,12 +19,12 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Llama2-7B/llama2 # CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_2_single.yaml | tee results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_2_single.txt # CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_3_single.yaml | tee results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_3_single.txt -# FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_1.yaml | tee results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_1.txt -# FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_2.yaml | tee results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_2.txt -# FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_3.yaml | tee results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_3.txt -# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_1_single.yaml | tee results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_1_single.txt -# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_2_single.yaml | tee results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_2_single.txt -# CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_3_single.yaml | tee results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_3_single.txt +FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_1.yaml | tee results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_1.txt +FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_2.yaml | tee results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_2.txt +FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_3.yaml | tee results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_3.txt +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_1_single.yaml | tee results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_1_single.txt +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_2_single.yaml | tee results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_2_single.txt +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_3_single.yaml | tee results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_3_single.txt # FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft/Qwen-7B/Qwen_lora_sft_1.yaml | tee results/lora_sft/Qwen-7B/Qwen_lora_sft_1.txt # FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft/Qwen-7B/Qwen_lora_sft_2.yaml | tee results/lora_sft/Qwen-7B/Qwen_lora_sft_2.txt diff --git a/results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_1.yaml b/results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_1.yaml index 43bd065c..f41903e8 100644 --- a/results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_1.yaml +++ b/results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_1.yaml @@ -32,6 +32,7 @@ warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 max_steps: 1000 +include_num_input_tokens_seen: true ### eval val_size: 0.1 diff --git a/results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_1_single.yaml b/results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_1_single.yaml index c7f84993..c10638e7 100644 --- a/results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_1_single.yaml +++ b/results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_1_single.yaml @@ -32,6 +32,7 @@ warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 max_steps: 1000 +include_num_input_tokens_seen: true ### eval val_size: 0.1 diff --git a/results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_2.yaml b/results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_2.yaml index 1aba0f98..019b3fe0 100644 --- a/results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_2.yaml +++ b/results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_2.yaml @@ -32,6 +32,7 @@ warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 max_steps: 1000 +include_num_input_tokens_seen: true ### eval val_size: 0.1 diff --git a/results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_2_single.yaml b/results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_2_single.yaml index 180d03d3..c35cc67d 100644 --- a/results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_2_single.yaml +++ b/results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_2_single.yaml @@ -32,6 +32,7 @@ warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 max_steps: 1000 +include_num_input_tokens_seen: true ### eval val_size: 0.1 diff --git a/results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_3.yaml b/results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_3.yaml index 02fc76d0..6305a915 100644 --- a/results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_3.yaml +++ b/results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_3.yaml @@ -32,6 +32,7 @@ warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 max_steps: 1000 +include_num_input_tokens_seen: true ### eval val_size: 0.1 diff --git a/results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_3_single.yaml b/results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_3_single.yaml index f5910264..c556d448 100644 --- a/results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_3_single.yaml +++ b/results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_3_single.yaml @@ -32,6 +32,7 @@ warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 max_steps: 1000 +include_num_input_tokens_seen: true ### eval val_size: 0.1 diff --git a/results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_1.yaml b/results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_1.yaml index e244940a..21119ed9 100644 --- a/results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_1.yaml +++ b/results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_1.yaml @@ -1,5 +1,5 @@ ### model -model_name_or_path: ZhipuAI/chatglm2-6b +model_name_or_path: ~/data ### method stage: sft @@ -32,6 +32,7 @@ warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 max_steps: 1000 +include_num_input_tokens_seen: true ### eval val_size: 0.1 diff --git a/results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_1_single.yaml b/results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_1_single.yaml index e8663ccf..aaaee8b0 100644 --- a/results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_1_single.yaml +++ b/results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_1_single.yaml @@ -1,5 +1,5 @@ ### model -model_name_or_path: ZhipuAI/chatglm2-6b +model_name_or_path: ~/data ### method stage: sft @@ -32,6 +32,7 @@ warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 max_steps: 1000 +include_num_input_tokens_seen: true ### eval val_size: 0.1 diff --git a/results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_2.yaml b/results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_2.yaml index 9a1e2c17..58bbe022 100644 --- a/results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_2.yaml +++ b/results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_2.yaml @@ -1,5 +1,5 @@ ### model -model_name_or_path: ZhipuAI/chatglm2-6b +model_name_or_path: ~/data ### method stage: sft @@ -32,6 +32,7 @@ warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 max_steps: 1000 +include_num_input_tokens_seen: true ### eval val_size: 0.1 diff --git a/results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_2_single.yaml b/results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_2_single.yaml index 6772ff8e..c3b8316e 100644 --- a/results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_2_single.yaml +++ b/results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_2_single.yaml @@ -1,5 +1,5 @@ ### model -model_name_or_path: ZhipuAI/chatglm2-6b +model_name_or_path: ~/data ### method stage: sft @@ -32,6 +32,7 @@ warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 max_steps: 1000 +include_num_input_tokens_seen: true ### eval val_size: 0.1 diff --git a/results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_3.yaml b/results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_3.yaml index 5e597ab9..ad156128 100644 --- a/results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_3.yaml +++ b/results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_3.yaml @@ -1,5 +1,5 @@ ### model -model_name_or_path: ZhipuAI/chatglm2-6b +model_name_or_path: ~/data ### method stage: sft @@ -32,6 +32,7 @@ warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 max_steps: 1000 +include_num_input_tokens_seen: true ### eval val_size: 0.1 diff --git a/results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_3_single.yaml b/results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_3_single.yaml index 88ef5849..3ffb4c85 100644 --- a/results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_3_single.yaml +++ b/results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_3_single.yaml @@ -1,5 +1,5 @@ ### model -model_name_or_path: ZhipuAI/chatglm2-6b +model_name_or_path: ~/data ### method stage: sft @@ -32,6 +32,7 @@ warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 max_steps: 1000 +include_num_input_tokens_seen: true ### eval val_size: 0.1 diff --git a/results/lora_sft/Llama2-7B/llama2_lora_sft_1.yaml b/results/lora_sft/Llama2-7B/llama2_lora_sft_1.yaml index aa27d4cc..af9ae3c0 100644 --- a/results/lora_sft/Llama2-7B/llama2_lora_sft_1.yaml +++ b/results/lora_sft/Llama2-7B/llama2_lora_sft_1.yaml @@ -31,7 +31,8 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true ddp_timeout: 180000000 -max_steps: 500 +max_steps: 1000 +include_num_input_tokens_seen: true ### eval val_size: 0.1 diff --git a/results/lora_sft/Llama2-7B/llama2_lora_sft_1_single.yaml b/results/lora_sft/Llama2-7B/llama2_lora_sft_1_single.yaml index 5a198b7d..f3cd2d6c 100644 --- a/results/lora_sft/Llama2-7B/llama2_lora_sft_1_single.yaml +++ b/results/lora_sft/Llama2-7B/llama2_lora_sft_1_single.yaml @@ -31,7 +31,8 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true ddp_timeout: 180000000 -max_steps: 500 +max_steps: 1000 +include_num_input_tokens_seen: true ### eval val_size: 0.1 diff --git a/results/lora_sft/Llama2-7B/llama2_lora_sft_2.yaml b/results/lora_sft/Llama2-7B/llama2_lora_sft_2.yaml index f9ada233..3f19b20e 100644 --- a/results/lora_sft/Llama2-7B/llama2_lora_sft_2.yaml +++ b/results/lora_sft/Llama2-7B/llama2_lora_sft_2.yaml @@ -31,7 +31,8 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true ddp_timeout: 180000000 -max_steps: 500 +max_steps: 1000 +include_num_input_tokens_seen: true ### eval val_size: 0.1 diff --git a/results/lora_sft/Llama2-7B/llama2_lora_sft_2_single.yaml b/results/lora_sft/Llama2-7B/llama2_lora_sft_2_single.yaml index a7ed1e76..f649e9ea 100644 --- a/results/lora_sft/Llama2-7B/llama2_lora_sft_2_single.yaml +++ b/results/lora_sft/Llama2-7B/llama2_lora_sft_2_single.yaml @@ -31,7 +31,8 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true ddp_timeout: 180000000 -max_steps: 500 +max_steps: 1000 +include_num_input_tokens_seen: true ### eval val_size: 0.1 diff --git a/results/lora_sft/Llama2-7B/llama2_lora_sft_3.yaml b/results/lora_sft/Llama2-7B/llama2_lora_sft_3.yaml index 23eaf7e1..08791e89 100644 --- a/results/lora_sft/Llama2-7B/llama2_lora_sft_3.yaml +++ b/results/lora_sft/Llama2-7B/llama2_lora_sft_3.yaml @@ -31,7 +31,8 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true ddp_timeout: 180000000 -max_steps: 500 +max_steps: 1000 +include_num_input_tokens_seen: true ### eval val_size: 0.1 diff --git a/results/lora_sft/Llama2-7B/llama2_lora_sft_3_single.yaml b/results/lora_sft/Llama2-7B/llama2_lora_sft_3_single.yaml index 068e816f..2d3aaea1 100644 --- a/results/lora_sft/Llama2-7B/llama2_lora_sft_3_single.yaml +++ b/results/lora_sft/Llama2-7B/llama2_lora_sft_3_single.yaml @@ -31,7 +31,8 @@ lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true ddp_timeout: 180000000 -max_steps: 500 +max_steps: 1000 +include_num_input_tokens_seen: true ### eval val_size: 0.1 diff --git a/results/lora_sft/Qwen-7B/Qwen_lora_sft_1.yaml b/results/lora_sft/Qwen-7B/Qwen_lora_sft_1.yaml index 7b531500..c8d6820f 100644 --- a/results/lora_sft/Qwen-7B/Qwen_lora_sft_1.yaml +++ b/results/lora_sft/Qwen-7B/Qwen_lora_sft_1.yaml @@ -32,6 +32,7 @@ warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 max_steps: 1000 +include_num_input_tokens_seen: true ### eval val_size: 0.1 diff --git a/results/lora_sft/Qwen-7B/Qwen_lora_sft_1_single.yaml b/results/lora_sft/Qwen-7B/Qwen_lora_sft_1_single.yaml index f62de109..add04a57 100644 --- a/results/lora_sft/Qwen-7B/Qwen_lora_sft_1_single.yaml +++ b/results/lora_sft/Qwen-7B/Qwen_lora_sft_1_single.yaml @@ -32,6 +32,7 @@ warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 max_steps: 1000 +include_num_input_tokens_seen: true ### eval val_size: 0.1 diff --git a/results/lora_sft/Qwen-7B/Qwen_lora_sft_2.yaml b/results/lora_sft/Qwen-7B/Qwen_lora_sft_2.yaml index f153ad28..1196ca97 100644 --- a/results/lora_sft/Qwen-7B/Qwen_lora_sft_2.yaml +++ b/results/lora_sft/Qwen-7B/Qwen_lora_sft_2.yaml @@ -32,6 +32,7 @@ warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 max_steps: 1000 +include_num_input_tokens_seen: true ### eval val_size: 0.1 diff --git a/results/lora_sft/Qwen-7B/Qwen_lora_sft_2_single.yaml b/results/lora_sft/Qwen-7B/Qwen_lora_sft_2_single.yaml index 1d4b436e..7f1aeac4 100644 --- a/results/lora_sft/Qwen-7B/Qwen_lora_sft_2_single.yaml +++ b/results/lora_sft/Qwen-7B/Qwen_lora_sft_2_single.yaml @@ -32,6 +32,7 @@ warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 max_steps: 1000 +include_num_input_tokens_seen: true ### eval val_size: 0.1 diff --git a/results/lora_sft/Qwen-7B/Qwen_lora_sft_3.yaml b/results/lora_sft/Qwen-7B/Qwen_lora_sft_3.yaml index 02e978b0..96c32ac4 100644 --- a/results/lora_sft/Qwen-7B/Qwen_lora_sft_3.yaml +++ b/results/lora_sft/Qwen-7B/Qwen_lora_sft_3.yaml @@ -32,6 +32,7 @@ warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 max_steps: 1000 +include_num_input_tokens_seen: true ### eval val_size: 0.1 diff --git a/results/lora_sft/Qwen-7B/Qwen_lora_sft_3_single.yaml b/results/lora_sft/Qwen-7B/Qwen_lora_sft_3_single.yaml index 2ac88686..da2464d6 100644 --- a/results/lora_sft/Qwen-7B/Qwen_lora_sft_3_single.yaml +++ b/results/lora_sft/Qwen-7B/Qwen_lora_sft_3_single.yaml @@ -32,6 +32,7 @@ warmup_ratio: 0.1 bf16: true ddp_timeout: 180000000 max_steps: 1000 +include_num_input_tokens_seen: true ### eval val_size: 0.1