fix: remove no need test file
This commit is contained in:
parent
ab4bf8bd4d
commit
c6a4d43c06
|
@ -1,31 +0,0 @@
|
|||
bf16: true
|
||||
cutoff_len: 1024
|
||||
dataset: belle_1m
|
||||
ddp_timeout: 180000000
|
||||
do_train: true
|
||||
eval_steps: 500
|
||||
eval_strategy: steps
|
||||
finetuning_type: lora
|
||||
gradient_accumulation_steps: 8
|
||||
include_num_input_tokens_seen: true
|
||||
include_tokens_per_second: true
|
||||
learning_rate: 0.0001
|
||||
logging_steps: 3
|
||||
lora_target: all
|
||||
lr_scheduler_type: cosine
|
||||
max_samples: 10000
|
||||
max_steps: '50'
|
||||
model_name_or_path: ''
|
||||
num_train_epochs: 10.0
|
||||
output_dir: ./results/lora_sft_Baichuan-7B_4_gpu_50_step_20240905050501
|
||||
overwrite_cache: true
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 2
|
||||
per_device_train_batch_size: 2
|
||||
plot_loss: true
|
||||
preprocessing_num_workers: 16
|
||||
save_steps: 500
|
||||
stage: sft
|
||||
template: ''
|
||||
val_size: 0.1
|
||||
warmup_ratio: 0.1
|
|
@ -1,31 +0,0 @@
|
|||
bf16: true
|
||||
cutoff_len: 1024
|
||||
dataset: belle_1m
|
||||
ddp_timeout: 180000000
|
||||
do_train: true
|
||||
eval_steps: 500
|
||||
eval_strategy: steps
|
||||
finetuning_type: lora
|
||||
gradient_accumulation_steps: 8
|
||||
include_num_input_tokens_seen: true
|
||||
include_tokens_per_second: true
|
||||
learning_rate: 0.0001
|
||||
logging_steps: 3
|
||||
lora_target: all
|
||||
lr_scheduler_type: cosine
|
||||
max_samples: 10000
|
||||
max_steps: '50'
|
||||
model_name_or_path: ''
|
||||
num_train_epochs: 10.0
|
||||
output_dir: ./results/lora_sft_Baichuan-7B_4_gpu_50_step_20240905050958
|
||||
overwrite_cache: true
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 2
|
||||
per_device_train_batch_size: 2
|
||||
plot_loss: true
|
||||
preprocessing_num_workers: 16
|
||||
save_steps: 500
|
||||
stage: sft
|
||||
template: ''
|
||||
val_size: 0.1
|
||||
warmup_ratio: 0.1
|
|
@ -1,31 +0,0 @@
|
|||
bf16: true
|
||||
cutoff_len: 1024
|
||||
dataset: belle_1m
|
||||
ddp_timeout: 180000000
|
||||
do_train: true
|
||||
eval_steps: 500
|
||||
eval_strategy: steps
|
||||
finetuning_type: lora
|
||||
gradient_accumulation_steps: 8
|
||||
include_num_input_tokens_seen: true
|
||||
include_tokens_per_second: true
|
||||
learning_rate: 0.0001
|
||||
logging_steps: 3
|
||||
lora_target: all
|
||||
lr_scheduler_type: cosine
|
||||
max_samples: 10000
|
||||
max_steps: '50'
|
||||
model_name_or_path: ''
|
||||
num_train_epochs: 10.0
|
||||
output_dir: ./results/lora_sft_Baichuan-7B_4_gpu_50_step_20240905051039
|
||||
overwrite_cache: true
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 2
|
||||
per_device_train_batch_size: 2
|
||||
plot_loss: true
|
||||
preprocessing_num_workers: 16
|
||||
save_steps: 500
|
||||
stage: sft
|
||||
template: ''
|
||||
val_size: 0.1
|
||||
warmup_ratio: 0.1
|
|
@ -1,31 +0,0 @@
|
|||
bf16: true
|
||||
cutoff_len: 1024
|
||||
dataset: belle_1m
|
||||
ddp_timeout: 180000000
|
||||
do_train: true
|
||||
eval_steps: 500
|
||||
eval_strategy: steps
|
||||
finetuning_type: lora
|
||||
gradient_accumulation_steps: 8
|
||||
include_num_input_tokens_seen: true
|
||||
include_tokens_per_second: true
|
||||
learning_rate: 0.0001
|
||||
logging_steps: 3
|
||||
lora_target: all
|
||||
lr_scheduler_type: cosine
|
||||
max_samples: 10000
|
||||
max_steps: '50'
|
||||
model_name_or_path: ../../models/Qwen-7B
|
||||
num_train_epochs: 10.0
|
||||
output_dir: ./results/lora_sft_Qwen-7B_4_gpu_50_step_20240905052241
|
||||
overwrite_cache: true
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 2
|
||||
per_device_train_batch_size: 2
|
||||
plot_loss: true
|
||||
preprocessing_num_workers: 16
|
||||
save_steps: 500
|
||||
stage: sft
|
||||
template: qwen
|
||||
val_size: 0.1
|
||||
warmup_ratio: 0.1
|
|
@ -1,31 +0,0 @@
|
|||
bf16: true
|
||||
cutoff_len: 1024
|
||||
dataset: belle_1m
|
||||
ddp_timeout: 180000000
|
||||
do_train: true
|
||||
eval_steps: 500
|
||||
eval_strategy: steps
|
||||
finetuning_type: lora
|
||||
gradient_accumulation_steps: 8
|
||||
include_num_input_tokens_seen: true
|
||||
include_tokens_per_second: true
|
||||
learning_rate: 0.0001
|
||||
logging_steps: 3
|
||||
lora_target: all
|
||||
lr_scheduler_type: cosine
|
||||
max_samples: 10000
|
||||
max_steps: '50'
|
||||
model_name_or_path: ../../models/Qwen-7B
|
||||
num_train_epochs: 10.0
|
||||
output_dir: ./results/lora_sft_Qwen-7B_4_gpu_50_step_20240905053758
|
||||
overwrite_cache: true
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 2
|
||||
per_device_train_batch_size: 2
|
||||
plot_loss: true
|
||||
preprocessing_num_workers: 16
|
||||
save_steps: 500
|
||||
stage: sft
|
||||
template: qwen
|
||||
val_size: 0.1
|
||||
warmup_ratio: 0.1
|
|
@ -1,31 +0,0 @@
|
|||
bf16: true
|
||||
cutoff_len: 1024
|
||||
dataset: belle_1m
|
||||
ddp_timeout: 180000000
|
||||
do_train: true
|
||||
eval_steps: 500
|
||||
eval_strategy: steps
|
||||
finetuning_type: lora
|
||||
gradient_accumulation_steps: 8
|
||||
include_num_input_tokens_seen: true
|
||||
include_tokens_per_second: true
|
||||
learning_rate: 0.0001
|
||||
logging_steps: 3
|
||||
lora_target: all
|
||||
lr_scheduler_type: cosine
|
||||
max_samples: 10000
|
||||
max_steps: '50'
|
||||
model_name_or_path: ../../models/Qwen-7B
|
||||
num_train_epochs: 10.0
|
||||
output_dir: ./results/lora_sft_Qwen-7B_4_gpu_50_step_20240905064243
|
||||
overwrite_cache: true
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 2
|
||||
per_device_train_batch_size: 2
|
||||
plot_loss: true
|
||||
preprocessing_num_workers: 16
|
||||
save_steps: 500
|
||||
stage: sft
|
||||
template: qwen
|
||||
val_size: 0.1
|
||||
warmup_ratio: 0.1
|
|
@ -1,31 +0,0 @@
|
|||
bf16: true
|
||||
cutoff_len: 1024
|
||||
dataset: belle_1m
|
||||
ddp_timeout: 180000000
|
||||
do_train: true
|
||||
eval_steps: 500
|
||||
eval_strategy: steps
|
||||
finetuning_type: lora
|
||||
gradient_accumulation_steps: 8
|
||||
include_num_input_tokens_seen: true
|
||||
include_tokens_per_second: true
|
||||
learning_rate: 0.0001
|
||||
logging_steps: 3
|
||||
lora_target: all
|
||||
lr_scheduler_type: cosine
|
||||
max_samples: 10000
|
||||
max_steps: '50'
|
||||
model_name_or_path: ../../models/Qwen-7B
|
||||
num_train_epochs: 10.0
|
||||
output_dir: ./results/lora_sft_Qwen-7B_4_gpu_50_step_20240905064736
|
||||
overwrite_cache: true
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 2
|
||||
per_device_train_batch_size: 2
|
||||
plot_loss: true
|
||||
preprocessing_num_workers: 16
|
||||
save_steps: 500
|
||||
stage: sft
|
||||
template: qwen
|
||||
val_size: 0.1
|
||||
warmup_ratio: 0.1
|
Loading…
Reference in New Issue