diff --git a/.gitignore b/.gitignore index 7a19c39d..9225a23b 100644 --- a/.gitignore +++ b/.gitignore @@ -166,5 +166,5 @@ cache/ config/ output/ wandb/ - +results/lora_sft diff --git a/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1.yaml b/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1.yaml index bb89b1f7..06c432b0 100644 --- a/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1.yaml +++ b/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_1 +output_dir: ./results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1 logging_steps: 3 save_steps: 500 plot_loss: true diff --git a/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1_single.yaml b/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1_single.yaml index 70b697f1..96d0b75c 100644 --- a/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1_single.yaml +++ b/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1_single.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_1_single +output_dir: ./results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_1_single logging_steps: 3 save_steps: 500 plot_loss: true diff --git a/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2.yaml b/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2.yaml index af14b4df..ceba3c34 100644 --- a/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2.yaml +++ b/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_2 +output_dir: ./results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2 logging_steps: 3 save_steps: 500 plot_loss: true diff --git a/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2_single.yaml b/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2_single.yaml index de7b4a11..af348d0a 100644 --- a/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2_single.yaml +++ b/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2_single.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_2_single +output_dir: ./results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_2_single logging_steps: 3 save_steps: 500 plot_loss: true diff --git a/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3.yaml b/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3.yaml index 6704b2bc..f1327f4d 100644 --- a/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3.yaml +++ b/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_3 +output_dir: ./results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3 logging_steps: 3 save_steps: 500 plot_loss: true diff --git a/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3_single.yaml b/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3_single.yaml index 785bd604..c303a4cf 100644 --- a/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3_single.yaml +++ b/results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3_single.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_3_single +output_dir: ./results/lora_sft_2/Baichuan2-7B/Baichuan2_lora_sft_3_single logging_steps: 3 save_steps: 500 plot_loss: true diff --git a/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1.yaml b/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1.yaml index cd08d3a6..abfe8a57 100644 --- a/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1.yaml +++ b/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_1 +output_dir: ./results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1 logging_steps: 3 save_steps: 500 plot_loss: true diff --git a/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1_single.yaml b/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1_single.yaml index 331c4aaf..7c052875 100644 --- a/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1_single.yaml +++ b/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1_single.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_1_single +output_dir: ./results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_1_single logging_steps: 3 save_steps: 500 plot_loss: true diff --git a/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2.yaml b/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2.yaml index f86be996..4e0b0c80 100644 --- a/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2.yaml +++ b/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_2 +output_dir: ./results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2 logging_steps: 3 save_steps: 500 plot_loss: true diff --git a/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2_single.yaml b/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2_single.yaml index 376ea958..6dde6001 100644 --- a/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2_single.yaml +++ b/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2_single.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_2_single +output_dir: ./results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_2_single logging_steps: 3 save_steps: 500 plot_loss: true diff --git a/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3.yaml b/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3.yaml index f6d13652..bfc6fc60 100644 --- a/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3.yaml +++ b/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_3 +output_dir: ./results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3 logging_steps: 3 save_steps: 500 plot_loss: true diff --git a/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3_single.yaml b/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3_single.yaml index 4ed25d4d..ea9873ce 100644 --- a/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3_single.yaml +++ b/results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3_single.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_3_single +output_dir: ./results/lora_sft_2/ChatGLM2-6B/ChatGLM2_lora_sft_3_single logging_steps: 3 save_steps: 500 plot_loss: true diff --git a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_1.yaml b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_1.yaml index 62bfe478..9d965f88 100644 --- a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_1.yaml +++ b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_1.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft/Llama2-7B/llama2_lora_sft_1 +output_dir: ./results/lora_sft_2/Llama2-7B/llama2_lora_sft_1 logging_steps: 3 save_steps: 500 plot_loss: true diff --git a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_1_single.yaml b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_1_single.yaml index 4977b9d7..5633cd3f 100644 --- a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_1_single.yaml +++ b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_1_single.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft/Llama2-7B/llama2_lora_sft_1_single +output_dir: ./results/lora_sft_2/Llama2-7B/llama2_lora_sft_1_single logging_steps: 3 save_steps: 500 plot_loss: true diff --git a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_2.yaml b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_2.yaml index fe46a60c..e4a05d0c 100644 --- a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_2.yaml +++ b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_2.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft/Llama2-7B/llama2_lora_sft_2 +output_dir: ./results/lora_sft_2/Llama2-7B/llama2_lora_sft_2 logging_steps: 3 save_steps: 500 plot_loss: true diff --git a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_2_single.yaml b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_2_single.yaml index ce394af8..1e839cc3 100644 --- a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_2_single.yaml +++ b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_2_single.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft/Llama2-7B/llama2_lora_sft_2_single +output_dir: ./results/lora_sft_2/Llama2-7B/llama2_lora_sft_2_single logging_steps: 3 save_steps: 500 plot_loss: true diff --git a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_3.yaml b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_3.yaml index 7fce6daf..042e210d 100644 --- a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_3.yaml +++ b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_3.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft/Llama2-7B/llama2_lora_sft_3 +output_dir: ./results/lora_sft_2/Llama2-7B/llama2_lora_sft_3 logging_steps: 3 save_steps: 500 plot_loss: true diff --git a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_3_single.yaml b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_3_single.yaml index ab0695ea..bf5b0f16 100644 --- a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_3_single.yaml +++ b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_3_single.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft/Llama2-7B/llama2_lora_sft_3_single +output_dir: ./results/lora_sft_2/Llama2-7B/llama2_lora_sft_3_single logging_steps: 3 save_steps: 500 plot_loss: true diff --git a/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1.yaml b/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1.yaml index b9991fc6..3cf84cdc 100644 --- a/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1.yaml +++ b/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft/Qwen-7B/Qwen_lora_sft_1 +output_dir: ./results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1 logging_steps: 3 save_steps: 500 plot_loss: true diff --git a/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1_single.yaml b/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1_single.yaml index 359d1fad..0a64c21d 100644 --- a/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1_single.yaml +++ b/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1_single.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft/Qwen-7B/Qwen_lora_sft_1_single +output_dir: ./results/lora_sft_2/Qwen-7B/Qwen_lora_sft_1_single logging_steps: 3 save_steps: 500 plot_loss: true diff --git a/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2.yaml b/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2.yaml index b0b0b3d9..6dd0f294 100644 --- a/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2.yaml +++ b/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft/Qwen-7B/Qwen_lora_sft_2 +output_dir: ./results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2 logging_steps: 3 save_steps: 500 plot_loss: true diff --git a/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2_single.yaml b/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2_single.yaml index 7004b44a..fcf35648 100644 --- a/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2_single.yaml +++ b/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2_single.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft/Qwen-7B/Qwen_lora_sft_2_single +output_dir: ./results/lora_sft_2/Qwen-7B/Qwen_lora_sft_2_single logging_steps: 3 save_steps: 500 plot_loss: true diff --git a/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3.yaml b/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3.yaml index 6586b9e0..01abbdce 100644 --- a/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3.yaml +++ b/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft/Qwen-7B/Qwen_lora_sft_3 +output_dir: ./results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3 logging_steps: 3 save_steps: 500 plot_loss: true diff --git a/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3_single.yaml b/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3_single.yaml index 469971eb..49f1237f 100644 --- a/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3_single.yaml +++ b/results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3_single.yaml @@ -16,7 +16,7 @@ overwrite_cache: true preprocessing_num_workers: 16 ### output -output_dir: ./results/lora_sft/Qwen-7B/Qwen_lora_sft_3_single +output_dir: ./results/lora_sft_2/Qwen-7B/Qwen_lora_sft_3_single logging_steps: 3 save_steps: 500 plot_loss: true