fix: remove no need test file
This commit is contained in:
parent
ab4bf8bd4d
commit
c6a4d43c06
|
@ -1,31 +0,0 @@
|
||||||
bf16: true
|
|
||||||
cutoff_len: 1024
|
|
||||||
dataset: belle_1m
|
|
||||||
ddp_timeout: 180000000
|
|
||||||
do_train: true
|
|
||||||
eval_steps: 500
|
|
||||||
eval_strategy: steps
|
|
||||||
finetuning_type: lora
|
|
||||||
gradient_accumulation_steps: 8
|
|
||||||
include_num_input_tokens_seen: true
|
|
||||||
include_tokens_per_second: true
|
|
||||||
learning_rate: 0.0001
|
|
||||||
logging_steps: 3
|
|
||||||
lora_target: all
|
|
||||||
lr_scheduler_type: cosine
|
|
||||||
max_samples: 10000
|
|
||||||
max_steps: '50'
|
|
||||||
model_name_or_path: ''
|
|
||||||
num_train_epochs: 10.0
|
|
||||||
output_dir: ./results/lora_sft_Baichuan-7B_4_gpu_50_step_20240905050501
|
|
||||||
overwrite_cache: true
|
|
||||||
overwrite_output_dir: true
|
|
||||||
per_device_eval_batch_size: 2
|
|
||||||
per_device_train_batch_size: 2
|
|
||||||
plot_loss: true
|
|
||||||
preprocessing_num_workers: 16
|
|
||||||
save_steps: 500
|
|
||||||
stage: sft
|
|
||||||
template: ''
|
|
||||||
val_size: 0.1
|
|
||||||
warmup_ratio: 0.1
|
|
|
@ -1,31 +0,0 @@
|
||||||
bf16: true
|
|
||||||
cutoff_len: 1024
|
|
||||||
dataset: belle_1m
|
|
||||||
ddp_timeout: 180000000
|
|
||||||
do_train: true
|
|
||||||
eval_steps: 500
|
|
||||||
eval_strategy: steps
|
|
||||||
finetuning_type: lora
|
|
||||||
gradient_accumulation_steps: 8
|
|
||||||
include_num_input_tokens_seen: true
|
|
||||||
include_tokens_per_second: true
|
|
||||||
learning_rate: 0.0001
|
|
||||||
logging_steps: 3
|
|
||||||
lora_target: all
|
|
||||||
lr_scheduler_type: cosine
|
|
||||||
max_samples: 10000
|
|
||||||
max_steps: '50'
|
|
||||||
model_name_or_path: ''
|
|
||||||
num_train_epochs: 10.0
|
|
||||||
output_dir: ./results/lora_sft_Baichuan-7B_4_gpu_50_step_20240905050958
|
|
||||||
overwrite_cache: true
|
|
||||||
overwrite_output_dir: true
|
|
||||||
per_device_eval_batch_size: 2
|
|
||||||
per_device_train_batch_size: 2
|
|
||||||
plot_loss: true
|
|
||||||
preprocessing_num_workers: 16
|
|
||||||
save_steps: 500
|
|
||||||
stage: sft
|
|
||||||
template: ''
|
|
||||||
val_size: 0.1
|
|
||||||
warmup_ratio: 0.1
|
|
|
@ -1,31 +0,0 @@
|
||||||
bf16: true
|
|
||||||
cutoff_len: 1024
|
|
||||||
dataset: belle_1m
|
|
||||||
ddp_timeout: 180000000
|
|
||||||
do_train: true
|
|
||||||
eval_steps: 500
|
|
||||||
eval_strategy: steps
|
|
||||||
finetuning_type: lora
|
|
||||||
gradient_accumulation_steps: 8
|
|
||||||
include_num_input_tokens_seen: true
|
|
||||||
include_tokens_per_second: true
|
|
||||||
learning_rate: 0.0001
|
|
||||||
logging_steps: 3
|
|
||||||
lora_target: all
|
|
||||||
lr_scheduler_type: cosine
|
|
||||||
max_samples: 10000
|
|
||||||
max_steps: '50'
|
|
||||||
model_name_or_path: ''
|
|
||||||
num_train_epochs: 10.0
|
|
||||||
output_dir: ./results/lora_sft_Baichuan-7B_4_gpu_50_step_20240905051039
|
|
||||||
overwrite_cache: true
|
|
||||||
overwrite_output_dir: true
|
|
||||||
per_device_eval_batch_size: 2
|
|
||||||
per_device_train_batch_size: 2
|
|
||||||
plot_loss: true
|
|
||||||
preprocessing_num_workers: 16
|
|
||||||
save_steps: 500
|
|
||||||
stage: sft
|
|
||||||
template: ''
|
|
||||||
val_size: 0.1
|
|
||||||
warmup_ratio: 0.1
|
|
|
@ -1,31 +0,0 @@
|
||||||
bf16: true
|
|
||||||
cutoff_len: 1024
|
|
||||||
dataset: belle_1m
|
|
||||||
ddp_timeout: 180000000
|
|
||||||
do_train: true
|
|
||||||
eval_steps: 500
|
|
||||||
eval_strategy: steps
|
|
||||||
finetuning_type: lora
|
|
||||||
gradient_accumulation_steps: 8
|
|
||||||
include_num_input_tokens_seen: true
|
|
||||||
include_tokens_per_second: true
|
|
||||||
learning_rate: 0.0001
|
|
||||||
logging_steps: 3
|
|
||||||
lora_target: all
|
|
||||||
lr_scheduler_type: cosine
|
|
||||||
max_samples: 10000
|
|
||||||
max_steps: '50'
|
|
||||||
model_name_or_path: ../../models/Qwen-7B
|
|
||||||
num_train_epochs: 10.0
|
|
||||||
output_dir: ./results/lora_sft_Qwen-7B_4_gpu_50_step_20240905052241
|
|
||||||
overwrite_cache: true
|
|
||||||
overwrite_output_dir: true
|
|
||||||
per_device_eval_batch_size: 2
|
|
||||||
per_device_train_batch_size: 2
|
|
||||||
plot_loss: true
|
|
||||||
preprocessing_num_workers: 16
|
|
||||||
save_steps: 500
|
|
||||||
stage: sft
|
|
||||||
template: qwen
|
|
||||||
val_size: 0.1
|
|
||||||
warmup_ratio: 0.1
|
|
|
@ -1,31 +0,0 @@
|
||||||
bf16: true
|
|
||||||
cutoff_len: 1024
|
|
||||||
dataset: belle_1m
|
|
||||||
ddp_timeout: 180000000
|
|
||||||
do_train: true
|
|
||||||
eval_steps: 500
|
|
||||||
eval_strategy: steps
|
|
||||||
finetuning_type: lora
|
|
||||||
gradient_accumulation_steps: 8
|
|
||||||
include_num_input_tokens_seen: true
|
|
||||||
include_tokens_per_second: true
|
|
||||||
learning_rate: 0.0001
|
|
||||||
logging_steps: 3
|
|
||||||
lora_target: all
|
|
||||||
lr_scheduler_type: cosine
|
|
||||||
max_samples: 10000
|
|
||||||
max_steps: '50'
|
|
||||||
model_name_or_path: ../../models/Qwen-7B
|
|
||||||
num_train_epochs: 10.0
|
|
||||||
output_dir: ./results/lora_sft_Qwen-7B_4_gpu_50_step_20240905053758
|
|
||||||
overwrite_cache: true
|
|
||||||
overwrite_output_dir: true
|
|
||||||
per_device_eval_batch_size: 2
|
|
||||||
per_device_train_batch_size: 2
|
|
||||||
plot_loss: true
|
|
||||||
preprocessing_num_workers: 16
|
|
||||||
save_steps: 500
|
|
||||||
stage: sft
|
|
||||||
template: qwen
|
|
||||||
val_size: 0.1
|
|
||||||
warmup_ratio: 0.1
|
|
|
@ -1,31 +0,0 @@
|
||||||
bf16: true
|
|
||||||
cutoff_len: 1024
|
|
||||||
dataset: belle_1m
|
|
||||||
ddp_timeout: 180000000
|
|
||||||
do_train: true
|
|
||||||
eval_steps: 500
|
|
||||||
eval_strategy: steps
|
|
||||||
finetuning_type: lora
|
|
||||||
gradient_accumulation_steps: 8
|
|
||||||
include_num_input_tokens_seen: true
|
|
||||||
include_tokens_per_second: true
|
|
||||||
learning_rate: 0.0001
|
|
||||||
logging_steps: 3
|
|
||||||
lora_target: all
|
|
||||||
lr_scheduler_type: cosine
|
|
||||||
max_samples: 10000
|
|
||||||
max_steps: '50'
|
|
||||||
model_name_or_path: ../../models/Qwen-7B
|
|
||||||
num_train_epochs: 10.0
|
|
||||||
output_dir: ./results/lora_sft_Qwen-7B_4_gpu_50_step_20240905064243
|
|
||||||
overwrite_cache: true
|
|
||||||
overwrite_output_dir: true
|
|
||||||
per_device_eval_batch_size: 2
|
|
||||||
per_device_train_batch_size: 2
|
|
||||||
plot_loss: true
|
|
||||||
preprocessing_num_workers: 16
|
|
||||||
save_steps: 500
|
|
||||||
stage: sft
|
|
||||||
template: qwen
|
|
||||||
val_size: 0.1
|
|
||||||
warmup_ratio: 0.1
|
|
|
@ -1,31 +0,0 @@
|
||||||
bf16: true
|
|
||||||
cutoff_len: 1024
|
|
||||||
dataset: belle_1m
|
|
||||||
ddp_timeout: 180000000
|
|
||||||
do_train: true
|
|
||||||
eval_steps: 500
|
|
||||||
eval_strategy: steps
|
|
||||||
finetuning_type: lora
|
|
||||||
gradient_accumulation_steps: 8
|
|
||||||
include_num_input_tokens_seen: true
|
|
||||||
include_tokens_per_second: true
|
|
||||||
learning_rate: 0.0001
|
|
||||||
logging_steps: 3
|
|
||||||
lora_target: all
|
|
||||||
lr_scheduler_type: cosine
|
|
||||||
max_samples: 10000
|
|
||||||
max_steps: '50'
|
|
||||||
model_name_or_path: ../../models/Qwen-7B
|
|
||||||
num_train_epochs: 10.0
|
|
||||||
output_dir: ./results/lora_sft_Qwen-7B_4_gpu_50_step_20240905064736
|
|
||||||
overwrite_cache: true
|
|
||||||
overwrite_output_dir: true
|
|
||||||
per_device_eval_batch_size: 2
|
|
||||||
per_device_train_batch_size: 2
|
|
||||||
plot_loss: true
|
|
||||||
preprocessing_num_workers: 16
|
|
||||||
save_steps: 500
|
|
||||||
stage: sft
|
|
||||||
template: qwen
|
|
||||||
val_size: 0.1
|
|
||||||
warmup_ratio: 0.1
|
|
Loading…
Reference in New Issue