add: add all test results

This commit is contained in:
wql 2024-09-05 06:52:33 +00:00
parent f71f62f2f6
commit ab4bf8bd4d
9 changed files with 217 additions and 0 deletions

View File

@ -0,0 +1,31 @@
bf16: true
cutoff_len: 1024
dataset: belle_1m
ddp_timeout: 180000000
do_train: true
eval_steps: 500
eval_strategy: steps
finetuning_type: lora
gradient_accumulation_steps: 8
include_num_input_tokens_seen: true
include_tokens_per_second: true
learning_rate: 0.0001
logging_steps: 3
lora_target: all
lr_scheduler_type: cosine
max_samples: 10000
max_steps: '50'
model_name_or_path: ''
num_train_epochs: 10.0
output_dir: ./results/lora_sft_Baichuan-7B_4_gpu_50_step_20240905050501
overwrite_cache: true
overwrite_output_dir: true
per_device_eval_batch_size: 2
per_device_train_batch_size: 2
plot_loss: true
preprocessing_num_workers: 16
save_steps: 500
stage: sft
template: ''
val_size: 0.1
warmup_ratio: 0.1

View File

@ -0,0 +1,31 @@
bf16: true
cutoff_len: 1024
dataset: belle_1m
ddp_timeout: 180000000
do_train: true
eval_steps: 500
eval_strategy: steps
finetuning_type: lora
gradient_accumulation_steps: 8
include_num_input_tokens_seen: true
include_tokens_per_second: true
learning_rate: 0.0001
logging_steps: 3
lora_target: all
lr_scheduler_type: cosine
max_samples: 10000
max_steps: '50'
model_name_or_path: ''
num_train_epochs: 10.0
output_dir: ./results/lora_sft_Baichuan-7B_4_gpu_50_step_20240905050958
overwrite_cache: true
overwrite_output_dir: true
per_device_eval_batch_size: 2
per_device_train_batch_size: 2
plot_loss: true
preprocessing_num_workers: 16
save_steps: 500
stage: sft
template: ''
val_size: 0.1
warmup_ratio: 0.1

View File

@ -0,0 +1,31 @@
bf16: true
cutoff_len: 1024
dataset: belle_1m
ddp_timeout: 180000000
do_train: true
eval_steps: 500
eval_strategy: steps
finetuning_type: lora
gradient_accumulation_steps: 8
include_num_input_tokens_seen: true
include_tokens_per_second: true
learning_rate: 0.0001
logging_steps: 3
lora_target: all
lr_scheduler_type: cosine
max_samples: 10000
max_steps: '50'
model_name_or_path: ''
num_train_epochs: 10.0
output_dir: ./results/lora_sft_Baichuan-7B_4_gpu_50_step_20240905051039
overwrite_cache: true
overwrite_output_dir: true
per_device_eval_batch_size: 2
per_device_train_batch_size: 2
plot_loss: true
preprocessing_num_workers: 16
save_steps: 500
stage: sft
template: ''
val_size: 0.1
warmup_ratio: 0.1

View File

@ -0,0 +1,31 @@
bf16: true
cutoff_len: 1024
dataset: belle_1m
ddp_timeout: 180000000
do_train: true
eval_steps: 500
eval_strategy: steps
finetuning_type: lora
gradient_accumulation_steps: 8
include_num_input_tokens_seen: true
include_tokens_per_second: true
learning_rate: 0.0001
logging_steps: 3
lora_target: all
lr_scheduler_type: cosine
max_samples: 10000
max_steps: '50'
model_name_or_path: ../../models/Qwen-7B
num_train_epochs: 10.0
output_dir: ./results/lora_sft_Qwen-7B_4_gpu_50_step_20240905052241
overwrite_cache: true
overwrite_output_dir: true
per_device_eval_batch_size: 2
per_device_train_batch_size: 2
plot_loss: true
preprocessing_num_workers: 16
save_steps: 500
stage: sft
template: qwen
val_size: 0.1
warmup_ratio: 0.1

View File

@ -0,0 +1,31 @@
bf16: true
cutoff_len: 1024
dataset: belle_1m
ddp_timeout: 180000000
do_train: true
eval_steps: 500
eval_strategy: steps
finetuning_type: lora
gradient_accumulation_steps: 8
include_num_input_tokens_seen: true
include_tokens_per_second: true
learning_rate: 0.0001
logging_steps: 3
lora_target: all
lr_scheduler_type: cosine
max_samples: 10000
max_steps: '50'
model_name_or_path: ../../models/Qwen-7B
num_train_epochs: 10.0
output_dir: ./results/lora_sft_Qwen-7B_4_gpu_50_step_20240905053758
overwrite_cache: true
overwrite_output_dir: true
per_device_eval_batch_size: 2
per_device_train_batch_size: 2
plot_loss: true
preprocessing_num_workers: 16
save_steps: 500
stage: sft
template: qwen
val_size: 0.1
warmup_ratio: 0.1

View File

@ -0,0 +1,31 @@
bf16: true
cutoff_len: 1024
dataset: belle_1m
ddp_timeout: 180000000
do_train: true
eval_steps: 500
eval_strategy: steps
finetuning_type: lora
gradient_accumulation_steps: 8
include_num_input_tokens_seen: true
include_tokens_per_second: true
learning_rate: 0.0001
logging_steps: 3
lora_target: all
lr_scheduler_type: cosine
max_samples: 10000
max_steps: '50'
model_name_or_path: ../../models/Qwen-7B
num_train_epochs: 10.0
output_dir: ./results/lora_sft_Qwen-7B_4_gpu_50_step_20240905064243
overwrite_cache: true
overwrite_output_dir: true
per_device_eval_batch_size: 2
per_device_train_batch_size: 2
plot_loss: true
preprocessing_num_workers: 16
save_steps: 500
stage: sft
template: qwen
val_size: 0.1
warmup_ratio: 0.1

View File

@ -0,0 +1,31 @@
bf16: true
cutoff_len: 1024
dataset: belle_1m
ddp_timeout: 180000000
do_train: true
eval_steps: 500
eval_strategy: steps
finetuning_type: lora
gradient_accumulation_steps: 8
include_num_input_tokens_seen: true
include_tokens_per_second: true
learning_rate: 0.0001
logging_steps: 3
lora_target: all
lr_scheduler_type: cosine
max_samples: 10000
max_steps: '50'
model_name_or_path: ../../models/Qwen-7B
num_train_epochs: 10.0
output_dir: ./results/lora_sft_Qwen-7B_4_gpu_50_step_20240905064736
overwrite_cache: true
overwrite_output_dir: true
per_device_eval_batch_size: 2
per_device_train_batch_size: 2
plot_loss: true
preprocessing_num_workers: 16
save_steps: 500
stage: sft
template: qwen
val_size: 0.1
warmup_ratio: 0.1