diff --git a/results/lora_sft_template.yaml b/results/lora_sft_template.yaml index 9a4411e4..710f12ac 100644 --- a/results/lora_sft_template.yaml +++ b/results/lora_sft_template.yaml @@ -25,7 +25,7 @@ overwrite_output_dir: true ### train per_device_train_batch_size: 2 gradient_accumulation_steps: 8 -learning_rate: 1.0e-4 +learning_rate: 2.0e-5 num_train_epochs: 10.0 lr_scheduler_type: cosine warmup_ratio: 0.1