diff --git a/sft_code.sh b/sft_code.sh new file mode 100644 index 0000000..a7c5e1f --- /dev/null +++ b/sft_code.sh @@ -0,0 +1,154 @@ +#!/bin/bash + +deepspeed --include localhost:0,1,2,3,4,5,6,7 --master_port 21666 src/train.py \ + --stage sft \ + --model_name_or_path /mnt/diskhd/Backup/DownloadModel/2b_sft_model/ \ + --do_train \ + --dataset TACO \ + --template jiuge \ + --finetuning_type full \ + --output_dir TACO \ + --per_device_train_batch_size 14 \ + --gradient_accumulation_steps 6 \ + --lr_scheduler_type cosine \ + --logging_step 1 \ + --save_steps 300 \ + --lr_scheduler_type cosine_with_restarts \ + --warmup_ratio 0.001 \ + --optim adamw_torch \ + --learning_rate 2e-5 \ + --num_train_epochs 2.0 \ + --plot_loss \ + --bf16 \ + --gradient_checkpointing \ + --report_to tensorboard \ + --deepspeed deepspeed_configs/zero2.json \ + --cutoff_len 2048 + +deepspeed --include localhost:0,1,2,3,4,5,6,7 --master_port 21666 src/train.py \ + --stage sft \ + --model_name_or_path /mnt/diskhd/Backup/DownloadModel/2b_sft_model/ \ + --do_train \ + --dataset Tested-143k-Python-Alpaca \ + --template jiuge \ + --finetuning_type full \ + --output_dir Tested-143k-Python-Alpaca \ + --per_device_train_batch_size 14 \ + --gradient_accumulation_steps 6 \ + --lr_scheduler_type cosine \ + --logging_step 1 \ + --save_steps 300 \ + --lr_scheduler_type cosine_with_restarts \ + --warmup_ratio 0.001 \ + --optim adamw_torch \ + --learning_rate 2e-5 \ + --num_train_epochs 2.0 \ + --plot_loss \ + --bf16 \ + --gradient_checkpointing \ + --report_to tensorboard \ + --deepspeed deepspeed_configs/zero2.json \ + --cutoff_len 2048 + +deepspeed --include localhost:0,1,2,3,4,5,6,7 --master_port 21666 src/train.py \ + --stage sft \ + --model_name_or_path /mnt/diskhd/Backup/DownloadModel/2b_sft_model/ \ + --do_train \ + --dataset UltraInteract_sft \ + --template jiuge \ + --finetuning_type full \ + --output_dir UltraInteract_sft \ + --per_device_train_batch_size 14 \ + --gradient_accumulation_steps 6 \ + --lr_scheduler_type cosine \ + --logging_step 1 \ + --save_steps 300 \ + --lr_scheduler_type cosine_with_restarts \ + --warmup_ratio 0.001 \ + --optim adamw_torch \ + --learning_rate 2e-5 \ + --num_train_epochs 2.0 \ + --plot_loss \ + --bf16 \ + --gradient_checkpointing \ + --report_to tensorboard \ + --deepspeed deepspeed_configs/zero2.json \ + --cutoff_len 2048 + + +deepspeed --include localhost:0,1,2,3,4,5,6,7 --master_port 21666 src/train.py \ + --stage sft \ + --model_name_or_path /mnt/diskhd/Backup/DownloadModel/2b_sft_model/ \ + --do_train \ + --dataset code_instructions_120k_alpaca \ + --template jiuge \ + --finetuning_type full \ + --output_dir code_instructions_120k_alpaca \ + --per_device_train_batch_size 14 \ + --gradient_accumulation_steps 6 \ + --lr_scheduler_type cosine \ + --logging_step 1 \ + --save_steps 300 \ + --lr_scheduler_type cosine_with_restarts \ + --warmup_ratio 0.001 \ + --optim adamw_torch \ + --learning_rate 2e-5 \ + --num_train_epochs 2.0 \ + --plot_loss \ + --bf16 \ + --gradient_checkpointing \ + --report_to tensorboard \ + --deepspeed deepspeed_configs/zero2.json \ + --cutoff_len 2048 + + +deepspeed --include localhost:0,1,2,3,4,5,6,7 --master_port 21666 src/train.py \ + --stage sft \ + --model_name_or_path /mnt/diskhd/Backup/DownloadModel/2b_sft_model/ \ + --do_train \ + --dataset CodeExercise-Python-27k \ + --template jiuge \ + --finetuning_type full \ + --output_dir CodeExercise-Python-27k \ + --per_device_train_batch_size 14 \ + --gradient_accumulation_steps 6 \ + --lr_scheduler_type cosine \ + --logging_step 1 \ + --save_steps 300 \ + --lr_scheduler_type cosine_with_restarts \ + --warmup_ratio 0.001 \ + --optim adamw_torch \ + --learning_rate 2e-5 \ + --num_train_epochs 2.0 \ + --plot_loss \ + --bf16 \ + --gradient_checkpointing \ + --report_to tensorboard \ + --deepspeed deepspeed_configs/zero2.json \ + --cutoff_len 2048 + + +deepspeed --include localhost:0,1,2,3,4,5,6,7 --master_port 21666 src/train.py \ + --stage sft \ + --model_name_or_path /mnt/diskhd/Backup/DownloadModel/2b_sft_model/ \ + --do_train \ + --dataset CodeNet4Repair \ + --template jiuge \ + --finetuning_type full \ + --output_dir CodeNet4Repair \ + --per_device_train_batch_size 14 \ + --gradient_accumulation_steps 6 \ + --lr_scheduler_type cosine \ + --logging_step 1 \ + --save_steps 300 \ + --lr_scheduler_type cosine_with_restarts \ + --warmup_ratio 0.001 \ + --optim adamw_torch \ + --learning_rate 2e-5 \ + --num_train_epochs 2.0 \ + --plot_loss \ + --bf16 \ + --gradient_checkpointing \ + --report_to tensorboard \ + --deepspeed deepspeed_configs/zero2.json \ + --cutoff_len 2048