diff --git a/sft_code.sh b/sft_code.sh deleted file mode 100644 index a7c5e1f..0000000 --- a/sft_code.sh +++ /dev/null @@ -1,154 +0,0 @@ -#!/bin/bash - -deepspeed --include localhost:0,1,2,3,4,5,6,7 --master_port 21666 src/train.py \ - --stage sft \ - --model_name_or_path /mnt/diskhd/Backup/DownloadModel/2b_sft_model/ \ - --do_train \ - --dataset TACO \ - --template jiuge \ - --finetuning_type full \ - --output_dir TACO \ - --per_device_train_batch_size 14 \ - --gradient_accumulation_steps 6 \ - --lr_scheduler_type cosine \ - --logging_step 1 \ - --save_steps 300 \ - --lr_scheduler_type cosine_with_restarts \ - --warmup_ratio 0.001 \ - --optim adamw_torch \ - --learning_rate 2e-5 \ - --num_train_epochs 2.0 \ - --plot_loss \ - --bf16 \ - --gradient_checkpointing \ - --report_to tensorboard \ - --deepspeed deepspeed_configs/zero2.json \ - --cutoff_len 2048 - -deepspeed --include localhost:0,1,2,3,4,5,6,7 --master_port 21666 src/train.py \ - --stage sft \ - --model_name_or_path /mnt/diskhd/Backup/DownloadModel/2b_sft_model/ \ - --do_train \ - --dataset Tested-143k-Python-Alpaca \ - --template jiuge \ - --finetuning_type full \ - --output_dir Tested-143k-Python-Alpaca \ - --per_device_train_batch_size 14 \ - --gradient_accumulation_steps 6 \ - --lr_scheduler_type cosine \ - --logging_step 1 \ - --save_steps 300 \ - --lr_scheduler_type cosine_with_restarts \ - --warmup_ratio 0.001 \ - --optim adamw_torch \ - --learning_rate 2e-5 \ - --num_train_epochs 2.0 \ - --plot_loss \ - --bf16 \ - --gradient_checkpointing \ - --report_to tensorboard \ - --deepspeed deepspeed_configs/zero2.json \ - --cutoff_len 2048 - -deepspeed --include localhost:0,1,2,3,4,5,6,7 --master_port 21666 src/train.py \ - --stage sft \ - --model_name_or_path /mnt/diskhd/Backup/DownloadModel/2b_sft_model/ \ - --do_train \ - --dataset UltraInteract_sft \ - --template jiuge \ - --finetuning_type full \ - --output_dir UltraInteract_sft \ - --per_device_train_batch_size 14 \ - --gradient_accumulation_steps 6 \ - --lr_scheduler_type cosine \ - --logging_step 1 \ - --save_steps 300 \ - --lr_scheduler_type cosine_with_restarts \ - --warmup_ratio 0.001 \ - --optim adamw_torch \ - --learning_rate 2e-5 \ - --num_train_epochs 2.0 \ - --plot_loss \ - --bf16 \ - --gradient_checkpointing \ - --report_to tensorboard \ - --deepspeed deepspeed_configs/zero2.json \ - --cutoff_len 2048 - - -deepspeed --include localhost:0,1,2,3,4,5,6,7 --master_port 21666 src/train.py \ - --stage sft \ - --model_name_or_path /mnt/diskhd/Backup/DownloadModel/2b_sft_model/ \ - --do_train \ - --dataset code_instructions_120k_alpaca \ - --template jiuge \ - --finetuning_type full \ - --output_dir code_instructions_120k_alpaca \ - --per_device_train_batch_size 14 \ - --gradient_accumulation_steps 6 \ - --lr_scheduler_type cosine \ - --logging_step 1 \ - --save_steps 300 \ - --lr_scheduler_type cosine_with_restarts \ - --warmup_ratio 0.001 \ - --optim adamw_torch \ - --learning_rate 2e-5 \ - --num_train_epochs 2.0 \ - --plot_loss \ - --bf16 \ - --gradient_checkpointing \ - --report_to tensorboard \ - --deepspeed deepspeed_configs/zero2.json \ - --cutoff_len 2048 - - -deepspeed --include localhost:0,1,2,3,4,5,6,7 --master_port 21666 src/train.py \ - --stage sft \ - --model_name_or_path /mnt/diskhd/Backup/DownloadModel/2b_sft_model/ \ - --do_train \ - --dataset CodeExercise-Python-27k \ - --template jiuge \ - --finetuning_type full \ - --output_dir CodeExercise-Python-27k \ - --per_device_train_batch_size 14 \ - --gradient_accumulation_steps 6 \ - --lr_scheduler_type cosine \ - --logging_step 1 \ - --save_steps 300 \ - --lr_scheduler_type cosine_with_restarts \ - --warmup_ratio 0.001 \ - --optim adamw_torch \ - --learning_rate 2e-5 \ - --num_train_epochs 2.0 \ - --plot_loss \ - --bf16 \ - --gradient_checkpointing \ - --report_to tensorboard \ - --deepspeed deepspeed_configs/zero2.json \ - --cutoff_len 2048 - - -deepspeed --include localhost:0,1,2,3,4,5,6,7 --master_port 21666 src/train.py \ - --stage sft \ - --model_name_or_path /mnt/diskhd/Backup/DownloadModel/2b_sft_model/ \ - --do_train \ - --dataset CodeNet4Repair \ - --template jiuge \ - --finetuning_type full \ - --output_dir CodeNet4Repair \ - --per_device_train_batch_size 14 \ - --gradient_accumulation_steps 6 \ - --lr_scheduler_type cosine \ - --logging_step 1 \ - --save_steps 300 \ - --lr_scheduler_type cosine_with_restarts \ - --warmup_ratio 0.001 \ - --optim adamw_torch \ - --learning_rate 2e-5 \ - --num_train_epochs 2.0 \ - --plot_loss \ - --bf16 \ - --gradient_checkpointing \ - --report_to tensorboard \ - --deepspeed deepspeed_configs/zero2.json \ - --cutoff_len 2048