diff --git a/results/lora_sft_Baichuan-7B_4_gpu_50_step_20240905050501/lora_sft_Baichuan-7B_4_gpu_50_step_20240905050501.yml b/results/lora_sft_Baichuan-7B_4_gpu_50_step_20240905050501/lora_sft_Baichuan-7B_4_gpu_50_step_20240905050501.yml new file mode 100644 index 00000000..26507813 --- /dev/null +++ b/results/lora_sft_Baichuan-7B_4_gpu_50_step_20240905050501/lora_sft_Baichuan-7B_4_gpu_50_step_20240905050501.yml @@ -0,0 +1,31 @@ +bf16: true +cutoff_len: 1024 +dataset: belle_1m +ddp_timeout: 180000000 +do_train: true +eval_steps: 500 +eval_strategy: steps +finetuning_type: lora +gradient_accumulation_steps: 8 +include_num_input_tokens_seen: true +include_tokens_per_second: true +learning_rate: 0.0001 +logging_steps: 3 +lora_target: all +lr_scheduler_type: cosine +max_samples: 10000 +max_steps: '50' +model_name_or_path: '' +num_train_epochs: 10.0 +output_dir: ./results/lora_sft_Baichuan-7B_4_gpu_50_step_20240905050501 +overwrite_cache: true +overwrite_output_dir: true +per_device_eval_batch_size: 2 +per_device_train_batch_size: 2 +plot_loss: true +preprocessing_num_workers: 16 +save_steps: 500 +stage: sft +template: '' +val_size: 0.1 +warmup_ratio: 0.1 diff --git a/results/lora_sft_Baichuan-7B_4_gpu_50_step_20240905050958/lora_sft_Baichuan-7B_4_gpu_50_step_20240905050958.yaml b/results/lora_sft_Baichuan-7B_4_gpu_50_step_20240905050958/lora_sft_Baichuan-7B_4_gpu_50_step_20240905050958.yaml new file mode 100644 index 00000000..e041b60b --- /dev/null +++ b/results/lora_sft_Baichuan-7B_4_gpu_50_step_20240905050958/lora_sft_Baichuan-7B_4_gpu_50_step_20240905050958.yaml @@ -0,0 +1,31 @@ +bf16: true +cutoff_len: 1024 +dataset: belle_1m +ddp_timeout: 180000000 +do_train: true +eval_steps: 500 +eval_strategy: steps +finetuning_type: lora +gradient_accumulation_steps: 8 +include_num_input_tokens_seen: true +include_tokens_per_second: true +learning_rate: 0.0001 +logging_steps: 3 +lora_target: all +lr_scheduler_type: cosine +max_samples: 10000 +max_steps: '50' +model_name_or_path: '' +num_train_epochs: 10.0 +output_dir: ./results/lora_sft_Baichuan-7B_4_gpu_50_step_20240905050958 +overwrite_cache: true +overwrite_output_dir: true +per_device_eval_batch_size: 2 +per_device_train_batch_size: 2 +plot_loss: true +preprocessing_num_workers: 16 +save_steps: 500 +stage: sft +template: '' +val_size: 0.1 +warmup_ratio: 0.1 diff --git a/results/lora_sft_Baichuan-7B_4_gpu_50_step_20240905051039/lora_sft_Baichuan-7B_4_gpu_50_step_20240905051039.yaml b/results/lora_sft_Baichuan-7B_4_gpu_50_step_20240905051039/lora_sft_Baichuan-7B_4_gpu_50_step_20240905051039.yaml new file mode 100644 index 00000000..84e13b18 --- /dev/null +++ b/results/lora_sft_Baichuan-7B_4_gpu_50_step_20240905051039/lora_sft_Baichuan-7B_4_gpu_50_step_20240905051039.yaml @@ -0,0 +1,31 @@ +bf16: true +cutoff_len: 1024 +dataset: belle_1m +ddp_timeout: 180000000 +do_train: true +eval_steps: 500 +eval_strategy: steps +finetuning_type: lora +gradient_accumulation_steps: 8 +include_num_input_tokens_seen: true +include_tokens_per_second: true +learning_rate: 0.0001 +logging_steps: 3 +lora_target: all +lr_scheduler_type: cosine +max_samples: 10000 +max_steps: '50' +model_name_or_path: '' +num_train_epochs: 10.0 +output_dir: ./results/lora_sft_Baichuan-7B_4_gpu_50_step_20240905051039 +overwrite_cache: true +overwrite_output_dir: true +per_device_eval_batch_size: 2 +per_device_train_batch_size: 2 +plot_loss: true +preprocessing_num_workers: 16 +save_steps: 500 +stage: sft +template: '' +val_size: 0.1 +warmup_ratio: 0.1 diff --git a/results/lora_sft_Qwen-7B_4_gpu_50_step_20240905052241/lora_sft_Qwen-7B_4_gpu_50_step_20240905052241.yaml b/results/lora_sft_Qwen-7B_4_gpu_50_step_20240905052241/lora_sft_Qwen-7B_4_gpu_50_step_20240905052241.yaml new file mode 100644 index 00000000..2a1de0fe --- /dev/null +++ b/results/lora_sft_Qwen-7B_4_gpu_50_step_20240905052241/lora_sft_Qwen-7B_4_gpu_50_step_20240905052241.yaml @@ -0,0 +1,31 @@ +bf16: true +cutoff_len: 1024 +dataset: belle_1m +ddp_timeout: 180000000 +do_train: true +eval_steps: 500 +eval_strategy: steps +finetuning_type: lora +gradient_accumulation_steps: 8 +include_num_input_tokens_seen: true +include_tokens_per_second: true +learning_rate: 0.0001 +logging_steps: 3 +lora_target: all +lr_scheduler_type: cosine +max_samples: 10000 +max_steps: '50' +model_name_or_path: ../../models/Qwen-7B +num_train_epochs: 10.0 +output_dir: ./results/lora_sft_Qwen-7B_4_gpu_50_step_20240905052241 +overwrite_cache: true +overwrite_output_dir: true +per_device_eval_batch_size: 2 +per_device_train_batch_size: 2 +plot_loss: true +preprocessing_num_workers: 16 +save_steps: 500 +stage: sft +template: qwen +val_size: 0.1 +warmup_ratio: 0.1 diff --git a/results/lora_sft_Qwen-7B_4_gpu_50_step_20240905053758/lora_sft_Qwen-7B_4_gpu_50_step_20240905053758.yaml b/results/lora_sft_Qwen-7B_4_gpu_50_step_20240905053758/lora_sft_Qwen-7B_4_gpu_50_step_20240905053758.yaml new file mode 100644 index 00000000..caa1505f --- /dev/null +++ b/results/lora_sft_Qwen-7B_4_gpu_50_step_20240905053758/lora_sft_Qwen-7B_4_gpu_50_step_20240905053758.yaml @@ -0,0 +1,31 @@ +bf16: true +cutoff_len: 1024 +dataset: belle_1m +ddp_timeout: 180000000 +do_train: true +eval_steps: 500 +eval_strategy: steps +finetuning_type: lora +gradient_accumulation_steps: 8 +include_num_input_tokens_seen: true +include_tokens_per_second: true +learning_rate: 0.0001 +logging_steps: 3 +lora_target: all +lr_scheduler_type: cosine +max_samples: 10000 +max_steps: '50' +model_name_or_path: ../../models/Qwen-7B +num_train_epochs: 10.0 +output_dir: ./results/lora_sft_Qwen-7B_4_gpu_50_step_20240905053758 +overwrite_cache: true +overwrite_output_dir: true +per_device_eval_batch_size: 2 +per_device_train_batch_size: 2 +plot_loss: true +preprocessing_num_workers: 16 +save_steps: 500 +stage: sft +template: qwen +val_size: 0.1 +warmup_ratio: 0.1 diff --git "a/results/lora_sft_Qwen-7B_4_gpu_50_step_20240905064243/log.txt &\n train_pid=1720\n echo Start" "b/results/lora_sft_Qwen-7B_4_gpu_50_step_20240905064243/log.txt &\n train_pid=1720\n echo Start" new file mode 100644 index 00000000..e69de29b diff --git a/results/lora_sft_Qwen-7B_4_gpu_50_step_20240905064243/lora_sft_Qwen-7B_4_gpu_50_step_20240905064243.yaml b/results/lora_sft_Qwen-7B_4_gpu_50_step_20240905064243/lora_sft_Qwen-7B_4_gpu_50_step_20240905064243.yaml new file mode 100644 index 00000000..4631b614 --- /dev/null +++ b/results/lora_sft_Qwen-7B_4_gpu_50_step_20240905064243/lora_sft_Qwen-7B_4_gpu_50_step_20240905064243.yaml @@ -0,0 +1,31 @@ +bf16: true +cutoff_len: 1024 +dataset: belle_1m +ddp_timeout: 180000000 +do_train: true +eval_steps: 500 +eval_strategy: steps +finetuning_type: lora +gradient_accumulation_steps: 8 +include_num_input_tokens_seen: true +include_tokens_per_second: true +learning_rate: 0.0001 +logging_steps: 3 +lora_target: all +lr_scheduler_type: cosine +max_samples: 10000 +max_steps: '50' +model_name_or_path: ../../models/Qwen-7B +num_train_epochs: 10.0 +output_dir: ./results/lora_sft_Qwen-7B_4_gpu_50_step_20240905064243 +overwrite_cache: true +overwrite_output_dir: true +per_device_eval_batch_size: 2 +per_device_train_batch_size: 2 +plot_loss: true +preprocessing_num_workers: 16 +save_steps: 500 +stage: sft +template: qwen +val_size: 0.1 +warmup_ratio: 0.1 diff --git a/results/lora_sft_Qwen-7B_4_gpu_50_step_20240905064736/log.txt b/results/lora_sft_Qwen-7B_4_gpu_50_step_20240905064736/log.txt new file mode 100644 index 00000000..e69de29b diff --git a/results/lora_sft_Qwen-7B_4_gpu_50_step_20240905064736/lora_sft_Qwen-7B_4_gpu_50_step_20240905064736.yaml b/results/lora_sft_Qwen-7B_4_gpu_50_step_20240905064736/lora_sft_Qwen-7B_4_gpu_50_step_20240905064736.yaml new file mode 100644 index 00000000..fe61b1e4 --- /dev/null +++ b/results/lora_sft_Qwen-7B_4_gpu_50_step_20240905064736/lora_sft_Qwen-7B_4_gpu_50_step_20240905064736.yaml @@ -0,0 +1,31 @@ +bf16: true +cutoff_len: 1024 +dataset: belle_1m +ddp_timeout: 180000000 +do_train: true +eval_steps: 500 +eval_strategy: steps +finetuning_type: lora +gradient_accumulation_steps: 8 +include_num_input_tokens_seen: true +include_tokens_per_second: true +learning_rate: 0.0001 +logging_steps: 3 +lora_target: all +lr_scheduler_type: cosine +max_samples: 10000 +max_steps: '50' +model_name_or_path: ../../models/Qwen-7B +num_train_epochs: 10.0 +output_dir: ./results/lora_sft_Qwen-7B_4_gpu_50_step_20240905064736 +overwrite_cache: true +overwrite_output_dir: true +per_device_eval_batch_size: 2 +per_device_train_batch_size: 2 +plot_loss: true +preprocessing_num_workers: 16 +save_steps: 500 +stage: sft +template: qwen +val_size: 0.1 +warmup_ratio: 0.1