diff --git a/results/lora_sft_Qwen-7B_4_gpu_50_step_20240905070656/log.txt b/results/lora_sft_Qwen-7B_4_gpu_50_step_20240905070656/log.txt new file mode 100644 index 00000000..e69de29b diff --git a/results/lora_sft_Qwen-7B_4_gpu_50_step_20240905070656/lora_sft_Qwen-7B_4_gpu_50_step_20240905070656.yaml b/results/lora_sft_Qwen-7B_4_gpu_50_step_20240905070656/lora_sft_Qwen-7B_4_gpu_50_step_20240905070656.yaml new file mode 100644 index 00000000..410ed726 --- /dev/null +++ b/results/lora_sft_Qwen-7B_4_gpu_50_step_20240905070656/lora_sft_Qwen-7B_4_gpu_50_step_20240905070656.yaml @@ -0,0 +1,31 @@ +bf16: true +cutoff_len: 1024 +dataset: belle_1m +ddp_timeout: 180000000 +do_train: true +eval_steps: 500 +eval_strategy: steps +finetuning_type: lora +gradient_accumulation_steps: 8 +include_num_input_tokens_seen: true +include_tokens_per_second: true +learning_rate: 0.0001 +logging_steps: 3 +lora_target: all +lr_scheduler_type: cosine +max_samples: 10000 +max_steps: '50' +model_name_or_path: ../../models/Qwen-7B +num_train_epochs: 10.0 +output_dir: ./results/lora_sft_Qwen-7B_4_gpu_50_step_20240905070656 +overwrite_cache: true +overwrite_output_dir: true +per_device_eval_batch_size: 2 +per_device_train_batch_size: 2 +plot_loss: true +preprocessing_num_workers: 16 +save_steps: 500 +stage: sft +template: qwen +val_size: 0.1 +warmup_ratio: 0.1