add: add batch run scripts

This commit is contained in:
wql 2024-08-18 14:02:58 +08:00
parent a0569cadda
commit f5b14a46be
50 changed files with 1525 additions and 3 deletions

56
batch_run.sh Normal file
View File

@ -0,0 +1,56 @@
FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft/Llama2-7B/llama2_lora_sft_1.yaml > train results/lora_sft/Llama2-7B/llama2_lora_sft_1.log
FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft/Llama2-7B/llama2_lora_sft_2.yaml > train results/lora_sft/Llama2-7B/llama2_lora_sft_2.log
FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft/Llama2-7B/llama2_lora_sft_3.yaml > train results/lora_sft/Llama2-7B/llama2_lora_sft_3.log
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft/Llama2-7B/llama2_lora_sft_1_single.yaml > train results/lora_sft/Llama2-7B/llama2_lora_sft_1_single.log
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft/Llama2-7B/llama2_lora_sft_2_single.yaml > train results/lora_sft/Llama2-7B/llama2_lora_sft_2_single.log
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft/Llama2-7B/llama2_lora_sft_3_single.yaml > train results/lora_sft/Llama2-7B/llama2_lora_sft_3_single.log
FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_1.yaml > train results/inference/Llama2-7B/llama2_predict_1.log
FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_2.yaml > train results/inference/Llama2-7B/llama2_predict_2.log
FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_3.yaml > train results/inference/Llama2-7B/llama2_predict_3.log
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_1_single.yaml > train results/inference/Llama2-7B/llama2_predict_1_single.log
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_2_single.yaml > train results/inference/Llama2-7B/llama2_predict_2_single.log
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Llama2-7B/llama2_predict_3_single.yaml > train results/inference/Llama2-7B/llama2_predict_3_single.log
FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_1.yaml > train results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_1.log
FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_2.yaml > train results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_2.log
FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_3.yaml > train results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_3.log
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_1_single.yaml > train results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_1_single.log
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_2_single.yaml > train results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_2_single.log
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_3_single.yaml > train results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_3_single.log
FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_1.yaml > train results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_1.log
FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_2.yaml > train results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_2.log
FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_3.yaml > train results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_3.log
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_1_single.yaml > train results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_1_single.log
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_2_single.yaml > train results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_2_single.log
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_3_single.yaml > train results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_3_single.log
FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft/Qwen-7B/Qwen_lora_sft_1.yaml > train results/lora_sft/Qwen-7B/Qwen_lora_sft_1.log
FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft/Qwen-7B/Qwen_lora_sft_2.yaml > train results/lora_sft/Qwen-7B/Qwen_lora_sft_2.log
FORCE_TORCHRUN=1 llamafactory-cli train results/lora_sft/Qwen-7B/Qwen_lora_sft_3.yaml > train results/lora_sft/Qwen-7B/Qwen_lora_sft_3.log
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft/Qwen-7B/Qwen_lora_sft_1_single.yaml > train results/lora_sft/Qwen-7B/Qwen_lora_sft_1_single.log
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft/Qwen-7B/Qwen_lora_sft_2_single.yaml > train results/lora_sft/Qwen-7B/Qwen_lora_sft_2_single.log
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/lora_sft/Qwen-7B/Qwen_lora_sft_3_single.yaml > train results/lora_sft/Qwen-7B/Qwen_lora_sft_3_single.log
FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Qwen-7B/Qwen_predict_1.yaml > train results/inference/Qwen-7B/Qwen_predict_1.log
FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Qwen-7B/Qwen_predict_2.yaml > train results/inference/Qwen-7B/Qwen_predict_2.log
FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Qwen-7B/Qwen_predict_3.yaml > train results/inference/Qwen-7B/Qwen_predict_3.log
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Qwen-7B/Qwen_predict_1_single.yaml > train results/inference/Qwen-7B/Qwen_predict_1_single.log
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Qwen-7B/Qwen_predict_2_single.yaml > train results/inference/Qwen-7B/Qwen_predict_2_single.log
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Qwen-7B/Qwen_predict_3_single.yaml > train results/inference/Qwen-7B/Qwen_predict_3_single.log
FORCE_TORCHRUN=1 llamafactory-cli train results/inference/ChatGLM2-6B/ChatGLM2_predict_1.yaml > train results/inference/ChatGLM2-6B/ChatGLM2_predict_1.log
FORCE_TORCHRUN=1 llamafactory-cli train results/inference/ChatGLM2-6B/ChatGLM2_predict_2.yaml > train results/inference/ChatGLM2-6B/ChatGLM2_predict_2.log
FORCE_TORCHRUN=1 llamafactory-cli train results/inference/ChatGLM2-6B/ChatGLM2_predict_3.yaml > train results/inference/ChatGLM2-6B/ChatGLM2_predict_3.log
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/ChatGLM2-6B/ChatGLM2_predict_1_single.yaml > train results/inference/ChatGLM2-6B/ChatGLM2_predict_1_single.log
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/ChatGLM2-6B/ChatGLM2_predict_2_single.yaml > train results/inference/ChatGLM2-6B/ChatGLM2_predict_2_single.log
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/ChatGLM2-6B/ChatGLM2_predict_3_single.yaml > train results/inference/ChatGLM2-6B/ChatGLM2_predict_3_single.log
FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Baichuan2-7B/Baichuan2_predict_1.yaml > train results/inference/Baichuan2-7B/Baichuan2_predict_1.log
FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Baichuan2-7B/Baichuan2_predict_2.yaml > train results/inference/Baichuan2-7B/Baichuan2_predict_2.log
FORCE_TORCHRUN=1 llamafactory-cli train results/inference/Baichuan2-7B/Baichuan2_predict_3.yaml > train results/inference/Baichuan2-7B/Baichuan2_predict_3.log
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Baichuan2-7B/Baichuan2_predict_1_single.yaml > train results/inference/Baichuan2-7B/Baichuan2_predict_1_single.log
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Baichuan2-7B/Baichuan2_predict_2_single.yaml > train results/inference/Baichuan2-7B/Baichuan2_predict_2_single.log
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train results/inference/Baichuan2-7B/Baichuan2_predict_3_single.yaml > train results/inference/Baichuan2-7B/Baichuan2_predict_3_single.log

View File

@ -0,0 +1,39 @@
### model
model_name_or_path: modelscope/Llama-2-7b-ms
### method
stage: sft
do_train: true
finetuning_type: full
deepspeed: examples/deepspeed/ds_z3_config.json
### dataset
dataset: belle_1m
template: llama2
cutoff_len: 1024
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/full/Llama2-7B/llama2_full_1
logging_steps: 3
save_steps: 100
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
learning_rate: 1.0e-5
num_train_epochs: 10.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.1
per_device_eval_batch_size: 2
eval_strategy: steps
eval_steps: 500

View File

@ -0,0 +1,22 @@
### model
model_name_or_path: baichuan-inc/baichuan-7B
### method
do_predict: true
### dataset
eval_dataset: alpaca_gpt4_zh
template: baichuan
cutoff_len: 1024
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/inference/Baichuan2-7B/Baichuan2_predict_1
overwrite_output_dir: true
### eval
per_device_eval_batch_size: 2
predict_with_generate: true
ddp_timeout: 180000000

View File

@ -0,0 +1,22 @@
### model
model_name_or_path: baichuan-inc/baichuan-7B
### method
do_predict: true
### dataset
eval_dataset: alpaca_gpt4_zh
template: baichuan
cutoff_len: 1024
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/inference/Baichuan2-7B/Baichuan2_predict_1_single
overwrite_output_dir: true
### eval
per_device_eval_batch_size: 2
predict_with_generate: true
ddp_timeout: 180000000

View File

@ -0,0 +1,22 @@
### model
model_name_or_path: baichuan-inc/baichuan-7B
### method
do_predict: true
### dataset
eval_dataset: alpaca_gpt4_zh
template: baichuan
cutoff_len: 1024
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/inference/Baichuan2-7B/Baichuan2_predict_2
overwrite_output_dir: true
### eval
per_device_eval_batch_size: 2
predict_with_generate: true
ddp_timeout: 180000000

View File

@ -0,0 +1,22 @@
### model
model_name_or_path: baichuan-inc/baichuan-7B
### method
do_predict: true
### dataset
eval_dataset: alpaca_gpt4_zh
template: baichuan
cutoff_len: 1024
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/inference/Baichuan2-7B/Baichuan2_predict_2_single
overwrite_output_dir: true
### eval
per_device_eval_batch_size: 2
predict_with_generate: true
ddp_timeout: 180000000

View File

@ -0,0 +1,22 @@
### model
model_name_or_path: baichuan-inc/baichuan-7B
### method
do_predict: true
### dataset
eval_dataset: alpaca_gpt4_zh
template: baichuan
cutoff_len: 1024
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/inference/Baichuan2-7B/Baichuan2_predict_3
overwrite_output_dir: true
### eval
per_device_eval_batch_size: 2
predict_with_generate: true
ddp_timeout: 180000000

View File

@ -0,0 +1,22 @@
### model
model_name_or_path: baichuan-inc/baichuan-7B
### method
do_predict: true
### dataset
eval_dataset: alpaca_gpt4_zh
template: baichuan
cutoff_len: 1024
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/inference/Baichuan2-7B/Baichuan2_predict_3_single
overwrite_output_dir: true
### eval
per_device_eval_batch_size: 2
predict_with_generate: true
ddp_timeout: 180000000

View File

@ -0,0 +1,22 @@
### model
model_name_or_path: ZhipuAI/chatglm2-6b
### method
do_predict: true
### dataset
eval_dataset: alpaca_gpt4_zh
template: chatglm2
cutoff_len: 1024
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/inference/ChatGLM2-6B/ChatGLM2_predict_1
overwrite_output_dir: true
### eval
per_device_eval_batch_size: 2
predict_with_generate: true
ddp_timeout: 180000000

View File

@ -0,0 +1,22 @@
### model
model_name_or_path: ZhipuAI/chatglm2-6b
### method
do_predict: true
### dataset
eval_dataset: alpaca_gpt4_zh
template: chatglm2
cutoff_len: 1024
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/inference/ChatGLM2-6B/ChatGLM2_predict_1_single
overwrite_output_dir: true
### eval
per_device_eval_batch_size: 2
predict_with_generate: true
ddp_timeout: 180000000

View File

@ -0,0 +1,22 @@
### model
model_name_or_path: ZhipuAI/chatglm2-6b
### method
do_predict: true
### dataset
eval_dataset: alpaca_gpt4_zh
template: chatglm2
cutoff_len: 1024
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/inference/ChatGLM2-6B/ChatGLM2_predict_2
overwrite_output_dir: true
### eval
per_device_eval_batch_size: 2
predict_with_generate: true
ddp_timeout: 180000000

View File

@ -0,0 +1,22 @@
### model
model_name_or_path: ZhipuAI/chatglm2-6b
### method
do_predict: true
### dataset
eval_dataset: alpaca_gpt4_zh
template: chatglm2
cutoff_len: 1024
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/inference/ChatGLM2-6B/ChatGLM2_predict_2_single
overwrite_output_dir: true
### eval
per_device_eval_batch_size: 2
predict_with_generate: true
ddp_timeout: 180000000

View File

@ -0,0 +1,22 @@
### model
model_name_or_path: ZhipuAI/chatglm2-6b
### method
do_predict: true
### dataset
eval_dataset: alpaca_gpt4_zh
template: chatglm2
cutoff_len: 1024
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/inference/ChatGLM2-6B/ChatGLM2_predict_3
overwrite_output_dir: true
### eval
per_device_eval_batch_size: 2
predict_with_generate: true
ddp_timeout: 180000000

View File

@ -0,0 +1,22 @@
### model
model_name_or_path: ZhipuAI/chatglm2-6b
### method
do_predict: true
### dataset
eval_dataset: alpaca_gpt4_zh
template: chatglm2
cutoff_len: 1024
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/inference/ChatGLM2-6B/ChatGLM2_predict_3_single
overwrite_output_dir: true
### eval
per_device_eval_batch_size: 2
predict_with_generate: true
ddp_timeout: 180000000

View File

@ -13,7 +13,7 @@ overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/inference/Llama2-7B/Llama2-7B_inference_08_15_11_06
output_dir: ./results/inference/Llama2-7B/llama2_predict_1
overwrite_output_dir: true
### eval

View File

@ -0,0 +1,22 @@
### model
model_name_or_path: modelscope/Llama-2-7b-ms
### method
do_predict: true
### dataset
eval_dataset: alpaca_gpt4_zh
template: llama2
cutoff_len: 1024
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/inference/Llama2-7B/llama2_predict_1_single
overwrite_output_dir: true
### eval
per_device_eval_batch_size: 2
predict_with_generate: true
ddp_timeout: 180000000

View File

@ -0,0 +1,22 @@
### model
model_name_or_path: modelscope/Llama-2-7b-ms
### method
do_predict: true
### dataset
eval_dataset: alpaca_gpt4_zh
template: llama2
cutoff_len: 1024
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/inference/Llama2-7B/llama2_predict_2
overwrite_output_dir: true
### eval
per_device_eval_batch_size: 2
predict_with_generate: true
ddp_timeout: 180000000

View File

@ -0,0 +1,22 @@
### model
model_name_or_path: modelscope/Llama-2-7b-ms
### method
do_predict: true
### dataset
eval_dataset: alpaca_gpt4_zh
template: llama2
cutoff_len: 1024
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/inference/Llama2-7B/llama2_predict_2_single
overwrite_output_dir: true
### eval
per_device_eval_batch_size: 2
predict_with_generate: true
ddp_timeout: 180000000

View File

@ -0,0 +1,22 @@
### model
model_name_or_path: modelscope/Llama-2-7b-ms
### method
do_predict: true
### dataset
eval_dataset: alpaca_gpt4_zh
template: llama2
cutoff_len: 1024
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/inference/Llama2-7B/llama2_predict_3
overwrite_output_dir: true
### eval
per_device_eval_batch_size: 2
predict_with_generate: true
ddp_timeout: 180000000

View File

@ -0,0 +1,22 @@
### model
model_name_or_path: modelscope/Llama-2-7b-ms
### method
do_predict: true
### dataset
eval_dataset: alpaca_gpt4_zh
template: llama2
cutoff_len: 1024
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/inference/Llama2-7B/llama2_predict_3_single
overwrite_output_dir: true
### eval
per_device_eval_batch_size: 2
predict_with_generate: true
ddp_timeout: 180000000

View File

@ -0,0 +1,22 @@
### model
model_name_or_path: qwen/Qwen-7B
### method
do_predict: true
### dataset
eval_dataset: alpaca_gpt4_zh
template: qwen
cutoff_len: 1024
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/inference/Qwen-7B/Qwen_predict_1
overwrite_output_dir: true
### eval
per_device_eval_batch_size: 2
predict_with_generate: true
ddp_timeout: 180000000

View File

@ -0,0 +1,22 @@
### model
model_name_or_path: qwen/Qwen-7B
### method
do_predict: true
### dataset
eval_dataset: alpaca_gpt4_zh
template: qwen
cutoff_len: 1024
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/inference/Qwen-7B/Qwen_predict_1_single
overwrite_output_dir: true
### eval
per_device_eval_batch_size: 2
predict_with_generate: true
ddp_timeout: 180000000

View File

@ -0,0 +1,22 @@
### model
model_name_or_path: qwen/Qwen-7B
### method
do_predict: true
### dataset
eval_dataset: alpaca_gpt4_zh
template: qwen
cutoff_len: 1024
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/inference/Qwen-7B/Qwen_predict_2
overwrite_output_dir: true
### eval
per_device_eval_batch_size: 2
predict_with_generate: true
ddp_timeout: 180000000

View File

@ -0,0 +1,22 @@
### model
model_name_or_path: qwen/Qwen-7B
### method
do_predict: true
### dataset
eval_dataset: alpaca_gpt4_zh
template: qwen
cutoff_len: 1024
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/inference/Qwen-7B/Qwen_predict_2_single
overwrite_output_dir: true
### eval
per_device_eval_batch_size: 2
predict_with_generate: true
ddp_timeout: 180000000

View File

@ -0,0 +1,22 @@
### model
model_name_or_path: qwen/Qwen-7B
### method
do_predict: true
### dataset
eval_dataset: alpaca_gpt4_zh
template: qwen
cutoff_len: 1024
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/inference/Qwen-7B/Qwen_predict_3
overwrite_output_dir: true
### eval
per_device_eval_batch_size: 2
predict_with_generate: true
ddp_timeout: 180000000

View File

@ -0,0 +1,22 @@
### model
model_name_or_path: qwen/Qwen-7B
### method
do_predict: true
### dataset
eval_dataset: alpaca_gpt4_zh
template: qwen
cutoff_len: 1024
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/inference/Qwen-7B/Qwen_predict_3_single
overwrite_output_dir: true
### eval
per_device_eval_batch_size: 2
predict_with_generate: true
ddp_timeout: 180000000

View File

@ -0,0 +1,40 @@
### model
model_name_or_path: baichuan-inc/baichuan-7B
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### dataset
dataset: belle_1m
template: baichuan
cutoff_len: 1024
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_1
logging_steps: 3
save_steps: 100
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
num_train_epochs: 10.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
ddp_timeout: 180000000
max_steps: 500
### eval
val_size: 0.1
per_device_eval_batch_size: 2
eval_strategy: steps
eval_steps: 500

View File

@ -0,0 +1,40 @@
### model
model_name_or_path: baichuan-inc/baichuan-7B
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### dataset
dataset: belle_1m
template: baichuan
cutoff_len: 1024
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_1_single
logging_steps: 3
save_steps: 100
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
num_train_epochs: 10.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
ddp_timeout: 180000000
max_steps: 500
### eval
val_size: 0.1
per_device_eval_batch_size: 2
eval_strategy: steps
eval_steps: 500

View File

@ -0,0 +1,40 @@
### model
model_name_or_path: baichuan-inc/baichuan-7B
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### dataset
dataset: belle_1m
template: baichuan
cutoff_len: 1024
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_2
logging_steps: 3
save_steps: 100
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
num_train_epochs: 10.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
ddp_timeout: 180000000
max_steps: 500
### eval
val_size: 0.1
per_device_eval_batch_size: 2
eval_strategy: steps
eval_steps: 500

View File

@ -0,0 +1,40 @@
### model
model_name_or_path: baichuan-inc/baichuan-7B
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### dataset
dataset: belle_1m
template: baichuan
cutoff_len: 1024
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_2_single
logging_steps: 3
save_steps: 100
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
num_train_epochs: 10.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
ddp_timeout: 180000000
max_steps: 500
### eval
val_size: 0.1
per_device_eval_batch_size: 2
eval_strategy: steps
eval_steps: 500

View File

@ -0,0 +1,40 @@
### model
model_name_or_path: baichuan-inc/baichuan-7B
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### dataset
dataset: belle_1m
template: baichuan
cutoff_len: 1024
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_3
logging_steps: 3
save_steps: 100
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
num_train_epochs: 10.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
ddp_timeout: 180000000
max_steps: 500
### eval
val_size: 0.1
per_device_eval_batch_size: 2
eval_strategy: steps
eval_steps: 500

View File

@ -0,0 +1,40 @@
### model
model_name_or_path: baichuan-inc/baichuan-7B
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### dataset
dataset: belle_1m
template: baichuan
cutoff_len: 1024
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/lora_sft/Baichuan2-7B/Baichuan2_lora_sft_3_single
logging_steps: 3
save_steps: 100
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
num_train_epochs: 10.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
ddp_timeout: 180000000
max_steps: 500
### eval
val_size: 0.1
per_device_eval_batch_size: 2
eval_strategy: steps
eval_steps: 500

View File

@ -0,0 +1,40 @@
### model
model_name_or_path: ZhipuAI/chatglm2-6b
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### dataset
dataset: belle_1m
template: chatglm2
cutoff_len: 1024
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_1
logging_steps: 3
save_steps: 100
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
num_train_epochs: 10.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
ddp_timeout: 180000000
max_steps: 500
### eval
val_size: 0.1
per_device_eval_batch_size: 2
eval_strategy: steps
eval_steps: 500

View File

@ -0,0 +1,40 @@
### model
model_name_or_path: ZhipuAI/chatglm2-6b
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### dataset
dataset: belle_1m
template: chatglm2
cutoff_len: 1024
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_1_single
logging_steps: 3
save_steps: 100
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
num_train_epochs: 10.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
ddp_timeout: 180000000
max_steps: 500
### eval
val_size: 0.1
per_device_eval_batch_size: 2
eval_strategy: steps
eval_steps: 500

View File

@ -0,0 +1,40 @@
### model
model_name_or_path: ZhipuAI/chatglm2-6b
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### dataset
dataset: belle_1m
template: chatglm2
cutoff_len: 1024
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_2
logging_steps: 3
save_steps: 100
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
num_train_epochs: 10.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
ddp_timeout: 180000000
max_steps: 500
### eval
val_size: 0.1
per_device_eval_batch_size: 2
eval_strategy: steps
eval_steps: 500

View File

@ -0,0 +1,40 @@
### model
model_name_or_path: ZhipuAI/chatglm2-6b
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### dataset
dataset: belle_1m
template: chatglm2
cutoff_len: 1024
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_2_single
logging_steps: 3
save_steps: 100
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
num_train_epochs: 10.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
ddp_timeout: 180000000
max_steps: 500
### eval
val_size: 0.1
per_device_eval_batch_size: 2
eval_strategy: steps
eval_steps: 500

View File

@ -0,0 +1,40 @@
### model
model_name_or_path: ZhipuAI/chatglm2-6b
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### dataset
dataset: belle_1m
template: chatglm2
cutoff_len: 1024
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_3
logging_steps: 3
save_steps: 100
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
num_train_epochs: 10.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
ddp_timeout: 180000000
max_steps: 500
### eval
val_size: 0.1
per_device_eval_batch_size: 2
eval_strategy: steps
eval_steps: 500

View File

@ -0,0 +1,40 @@
### model
model_name_or_path: ZhipuAI/chatglm2-6b
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### dataset
dataset: belle_1m
template: chatglm2
cutoff_len: 1024
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/lora_sft/ChatGLM2-6B/ChatGLM2_lora_sft_3_single
logging_steps: 3
save_steps: 100
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
num_train_epochs: 10.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
ddp_timeout: 180000000
max_steps: 500
### eval
val_size: 0.1
per_device_eval_batch_size: 2
eval_strategy: steps
eval_steps: 500

View File

@ -16,7 +16,7 @@ overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/lora_sft/Llama2-7B/Llama2-7B_lora_sft_08_15_11_01
output_dir: ./results/lora_sft/Llama2-7B/llama2_lora_sft_1
logging_steps: 3
save_steps: 100
plot_loss: true
@ -29,8 +29,9 @@ learning_rate: 1.0e-4
num_train_epochs: 10.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
bf16: true
fp16: true
ddp_timeout: 180000000
max_steps: 500
### eval
val_size: 0.1

View File

@ -0,0 +1,40 @@
### model
model_name_or_path: modelscope/Llama-2-7b-ms
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### dataset
dataset: belle_1m
template: llama2
cutoff_len: 1024
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/lora_sft/Llama2-7B/llama2_lora_sft_1_single
logging_steps: 3
save_steps: 100
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
num_train_epochs: 10.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
ddp_timeout: 180000000
max_steps: 500
### eval
val_size: 0.1
per_device_eval_batch_size: 2
eval_strategy: steps
eval_steps: 500

View File

@ -0,0 +1,40 @@
### model
model_name_or_path: modelscope/Llama-2-7b-ms
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### dataset
dataset: belle_1m
template: llama2
cutoff_len: 1024
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/lora_sft/Llama2-7B/llama2_lora_sft_2
logging_steps: 3
save_steps: 100
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
num_train_epochs: 10.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
ddp_timeout: 180000000
max_steps: 500
### eval
val_size: 0.1
per_device_eval_batch_size: 2
eval_strategy: steps
eval_steps: 500

View File

@ -0,0 +1,40 @@
### model
model_name_or_path: modelscope/Llama-2-7b-ms
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### dataset
dataset: belle_1m
template: llama2
cutoff_len: 1024
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/lora_sft/Llama2-7B/llama2_lora_sft_2_single
logging_steps: 3
save_steps: 100
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
num_train_epochs: 10.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
ddp_timeout: 180000000
max_steps: 500
### eval
val_size: 0.1
per_device_eval_batch_size: 2
eval_strategy: steps
eval_steps: 500

View File

@ -0,0 +1,40 @@
### model
model_name_or_path: modelscope/Llama-2-7b-ms
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### dataset
dataset: belle_1m
template: llama2
cutoff_len: 1024
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/lora_sft/Llama2-7B/llama2_lora_sft_3
logging_steps: 3
save_steps: 100
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
num_train_epochs: 10.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
ddp_timeout: 180000000
max_steps: 500
### eval
val_size: 0.1
per_device_eval_batch_size: 2
eval_strategy: steps
eval_steps: 500

View File

@ -0,0 +1,40 @@
### model
model_name_or_path: modelscope/Llama-2-7b-ms
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### dataset
dataset: belle_1m
template: llama2
cutoff_len: 1024
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/lora_sft/Llama2-7B/llama2_lora_sft_3_single
logging_steps: 3
save_steps: 100
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
num_train_epochs: 10.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
ddp_timeout: 180000000
max_steps: 500
### eval
val_size: 0.1
per_device_eval_batch_size: 2
eval_strategy: steps
eval_steps: 500

View File

@ -0,0 +1,40 @@
### model
model_name_or_path: qwen/Qwen-7B
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### dataset
dataset: belle_1m
template: qwen
cutoff_len: 1024
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/lora_sft/Qwen-7B/Qwen_lora_sft_1
logging_steps: 3
save_steps: 100
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
num_train_epochs: 10.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
ddp_timeout: 180000000
max_steps: 500
### eval
val_size: 0.1
per_device_eval_batch_size: 2
eval_strategy: steps
eval_steps: 500

View File

@ -0,0 +1,40 @@
### model
model_name_or_path: qwen/Qwen-7B
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### dataset
dataset: belle_1m
template: qwen
cutoff_len: 1024
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/lora_sft/Qwen-7B/Qwen_lora_sft_1_single
logging_steps: 3
save_steps: 100
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
num_train_epochs: 10.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
ddp_timeout: 180000000
max_steps: 500
### eval
val_size: 0.1
per_device_eval_batch_size: 2
eval_strategy: steps
eval_steps: 500

View File

@ -0,0 +1,40 @@
### model
model_name_or_path: qwen/Qwen-7B
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### dataset
dataset: belle_1m
template: qwen
cutoff_len: 1024
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/lora_sft/Qwen-7B/Qwen_lora_sft_2
logging_steps: 3
save_steps: 100
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
num_train_epochs: 10.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
ddp_timeout: 180000000
max_steps: 500
### eval
val_size: 0.1
per_device_eval_batch_size: 2
eval_strategy: steps
eval_steps: 500

View File

@ -0,0 +1,40 @@
### model
model_name_or_path: qwen/Qwen-7B
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### dataset
dataset: belle_1m
template: qwen
cutoff_len: 1024
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/lora_sft/Qwen-7B/Qwen_lora_sft_2_single
logging_steps: 3
save_steps: 100
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
num_train_epochs: 10.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
ddp_timeout: 180000000
max_steps: 500
### eval
val_size: 0.1
per_device_eval_batch_size: 2
eval_strategy: steps
eval_steps: 500

View File

@ -0,0 +1,40 @@
### model
model_name_or_path: qwen/Qwen-7B
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### dataset
dataset: belle_1m
template: qwen
cutoff_len: 1024
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/lora_sft/Qwen-7B/Qwen_lora_sft_3
logging_steps: 3
save_steps: 100
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
num_train_epochs: 10.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
ddp_timeout: 180000000
max_steps: 500
### eval
val_size: 0.1
per_device_eval_batch_size: 2
eval_strategy: steps
eval_steps: 500

View File

@ -0,0 +1,40 @@
### model
model_name_or_path: qwen/Qwen-7B
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### dataset
dataset: belle_1m
template: qwen
cutoff_len: 1024
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: ./results/lora_sft/Qwen-7B/Qwen_lora_sft_3_single
logging_steps: 3
save_steps: 100
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
num_train_epochs: 10.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
ddp_timeout: 180000000
max_steps: 500
### eval
val_size: 0.1
per_device_eval_batch_size: 2
eval_strategy: steps
eval_steps: 500