diff --git a/results/lora_sft_template.yaml b/results/lora_sft_template.yaml index cccd8baa..9a4411e4 100644 --- a/results/lora_sft_template.yaml +++ b/results/lora_sft_template.yaml @@ -25,7 +25,7 @@ overwrite_output_dir: true ### train per_device_train_batch_size: 2 gradient_accumulation_steps: 8 -learning_rate: 1.0e-5 +learning_rate: 1.0e-4 num_train_epochs: 10.0 lr_scheduler_type: cosine warmup_ratio: 0.1 diff --git a/run_once.sh b/run_once.sh index 747ba8ac..d4413f0d 100644 --- a/run_once.sh +++ b/run_once.sh @@ -3,6 +3,7 @@ # bash run_once.sh lora_sft Qwen-7B 8 50 # bash run_once.sh lora_sft Qwen-7B 1 50 +# bash run_once.sh lora_sft 9g-8B 8 50 # bash run_once.sh inference Qwen-7B 8 50 run_type="$1" model="$2"