diff --git a/examples/train_lora/llama2_lora_sft.yaml b/examples/train_lora/llama2_lora_sft.yaml index 959347b0..792fcb78 100644 --- a/examples/train_lora/llama2_lora_sft.yaml +++ b/examples/train_lora/llama2_lora_sft.yaml @@ -17,7 +17,7 @@ preprocessing_num_workers: 16 ### output output_dir: ./saves/LLaMA2-7B/lora/train_24_8_12_16_46 -logging_steps: 5 +logging_steps: 3 save_steps: 100 plot_loss: true overwrite_output_dir: true @@ -26,7 +26,7 @@ overwrite_output_dir: true per_device_train_batch_size: 2 gradient_accumulation_steps: 8 learning_rate: 5.0e-5 -num_train_epochs: 5.0 +num_train_epochs: 10 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true