diff --git a/examples/train_lora/llama2_lora_sft.yaml b/examples/train_lora/llama2_lora_sft.yaml index cb834840..0e0e5fef 100644 --- a/examples/train_lora/llama2_lora_sft.yaml +++ b/examples/train_lora/llama2_lora_sft.yaml @@ -13,10 +13,9 @@ template: llama2 cutoff_len: 1024 max_samples: 100000 overwrite_cache: true -preprocessing_num_workers: 16 ### output -output_dir: ./saves/LLaMA2-7B/lora/train_24_8_13_08_13 +output_dir: ./saves/LLaMA2-7B/lora/train_24_8_13_10_02 logging_steps: 3 save_steps: 100 plot_loss: true @@ -25,16 +24,15 @@ overwrite_output_dir: true ### train per_device_train_batch_size: 2 gradient_accumulation_steps: 8 -learning_rate: 1.5e-4 +learning_rate: 5.0e-05 num_train_epochs: 10 lr_scheduler_type: cosine -warmup_ratio: 0.1 fp16: true -ddp_timeout: 180000000 - -### eval -val_size: 0.1 -per_device_eval_batch_size: 1 # 1 最大 -eval_strategy: steps -eval_steps: 500 +max_grad_norm: 1.0 +warmup_steps: 0 +optim: adamw_torch +report_to: none +lora_rank: 8 +lora_alpha: 16 +lora_dropout: 0.1 \ No newline at end of file