From beb97a099c57121edaf1eae28bb949c8b996ec14 Mon Sep 17 00:00:00 2001 From: wql Date: Tue, 13 Aug 2024 13:19:42 +0800 Subject: [PATCH] train: change yaml --- examples/train_lora/llama2_lora_sft.yaml | 27 ++++++++++++------------ 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/examples/train_lora/llama2_lora_sft.yaml b/examples/train_lora/llama2_lora_sft.yaml index 0e0e5fef..95884399 100644 --- a/examples/train_lora/llama2_lora_sft.yaml +++ b/examples/train_lora/llama2_lora_sft.yaml @@ -5,17 +5,18 @@ model_name_or_path: modelscope/Llama-2-7b-ms stage: sft do_train: true finetuning_type: lora -lora_target: q_proj,v_proj +lora_target: all ### dataset dataset: alpaca_en template: llama2 cutoff_len: 1024 -max_samples: 100000 +max_samples: 10000 overwrite_cache: true +preprocessing_num_workers: 16 ### output -output_dir: ./saves/LLaMA2-7B/lora/train_24_8_13_10_02 +output_dir: ./saves/LLaMA2-7B/lora/train_24_8_13_13_16 logging_steps: 3 save_steps: 100 plot_loss: true @@ -24,15 +25,15 @@ overwrite_output_dir: true ### train per_device_train_batch_size: 2 gradient_accumulation_steps: 8 -learning_rate: 5.0e-05 -num_train_epochs: 10 +learning_rate: 1.0e-4 +num_train_epochs: 10.0 lr_scheduler_type: cosine -fp16: true +warmup_ratio: 0.1 +bf16: true +ddp_timeout: 180000000 -max_grad_norm: 1.0 -warmup_steps: 0 -optim: adamw_torch -report_to: none -lora_rank: 8 -lora_alpha: 16 -lora_dropout: 0.1 \ No newline at end of file +### eval +val_size: 0.1 +per_device_eval_batch_size: 2 +eval_strategy: steps +eval_steps: 500 \ No newline at end of file