#!/bin/bash deepspeed --include localhost:0,1,2,3,4,5,6,7 --master_port 21666 src/train.py \ --stage sft \ --model_name_or_path /mnt/diskhd/Backup/DownloadModel/2b_sft_model/ \ --do_train \ --dataset TACO \ --template jiuge \ --finetuning_type full \ --output_dir TACO \ --per_device_train_batch_size 14 \ --gradient_accumulation_steps 6 \ --lr_scheduler_type cosine \ --logging_step 1 \ --save_steps 300 \ --lr_scheduler_type cosine_with_restarts \ --warmup_ratio 0.001 \ --optim adamw_torch \ --learning_rate 2e-5 \ --num_train_epochs 2.0 \ --plot_loss \ --bf16 \ --gradient_checkpointing \ --report_to tensorboard \ --deepspeed deepspeed_configs/zero2.json \ --cutoff_len 2048 deepspeed --include localhost:0,1,2,3,4,5,6,7 --master_port 21666 src/train.py \ --stage sft \ --model_name_or_path /mnt/diskhd/Backup/DownloadModel/2b_sft_model/ \ --do_train \ --dataset Tested-143k-Python-Alpaca \ --template jiuge \ --finetuning_type full \ --output_dir Tested-143k-Python-Alpaca \ --per_device_train_batch_size 14 \ --gradient_accumulation_steps 6 \ --lr_scheduler_type cosine \ --logging_step 1 \ --save_steps 300 \ --lr_scheduler_type cosine_with_restarts \ --warmup_ratio 0.001 \ --optim adamw_torch \ --learning_rate 2e-5 \ --num_train_epochs 2.0 \ --plot_loss \ --bf16 \ --gradient_checkpointing \ --report_to tensorboard \ --deepspeed deepspeed_configs/zero2.json \ --cutoff_len 2048 deepspeed --include localhost:0,1,2,3,4,5,6,7 --master_port 21666 src/train.py \ --stage sft \ --model_name_or_path /mnt/diskhd/Backup/DownloadModel/2b_sft_model/ \ --do_train \ --dataset UltraInteract_sft \ --template jiuge \ --finetuning_type full \ --output_dir UltraInteract_sft \ --per_device_train_batch_size 14 \ --gradient_accumulation_steps 6 \ --lr_scheduler_type cosine \ --logging_step 1 \ --save_steps 300 \ --lr_scheduler_type cosine_with_restarts \ --warmup_ratio 0.001 \ --optim adamw_torch \ --learning_rate 2e-5 \ --num_train_epochs 2.0 \ --plot_loss \ --bf16 \ --gradient_checkpointing \ --report_to tensorboard \ --deepspeed deepspeed_configs/zero2.json \ --cutoff_len 2048 deepspeed --include localhost:0,1,2,3,4,5,6,7 --master_port 21666 src/train.py \ --stage sft \ --model_name_or_path /mnt/diskhd/Backup/DownloadModel/2b_sft_model/ \ --do_train \ --dataset code_instructions_120k_alpaca \ --template jiuge \ --finetuning_type full \ --output_dir code_instructions_120k_alpaca \ --per_device_train_batch_size 14 \ --gradient_accumulation_steps 6 \ --lr_scheduler_type cosine \ --logging_step 1 \ --save_steps 300 \ --lr_scheduler_type cosine_with_restarts \ --warmup_ratio 0.001 \ --optim adamw_torch \ --learning_rate 2e-5 \ --num_train_epochs 2.0 \ --plot_loss \ --bf16 \ --gradient_checkpointing \ --report_to tensorboard \ --deepspeed deepspeed_configs/zero2.json \ --cutoff_len 2048 deepspeed --include localhost:0,1,2,3,4,5,6,7 --master_port 21666 src/train.py \ --stage sft \ --model_name_or_path /mnt/diskhd/Backup/DownloadModel/2b_sft_model/ \ --do_train \ --dataset CodeExercise-Python-27k \ --template jiuge \ --finetuning_type full \ --output_dir CodeExercise-Python-27k \ --per_device_train_batch_size 14 \ --gradient_accumulation_steps 6 \ --lr_scheduler_type cosine \ --logging_step 1 \ --save_steps 300 \ --lr_scheduler_type cosine_with_restarts \ --warmup_ratio 0.001 \ --optim adamw_torch \ --learning_rate 2e-5 \ --num_train_epochs 2.0 \ --plot_loss \ --bf16 \ --gradient_checkpointing \ --report_to tensorboard \ --deepspeed deepspeed_configs/zero2.json \ --cutoff_len 2048 deepspeed --include localhost:0,1,2,3,4,5,6,7 --master_port 21666 src/train.py \ --stage sft \ --model_name_or_path /mnt/diskhd/Backup/DownloadModel/2b_sft_model/ \ --do_train \ --dataset CodeNet4Repair \ --template jiuge \ --finetuning_type full \ --output_dir CodeNet4Repair \ --per_device_train_batch_size 14 \ --gradient_accumulation_steps 6 \ --lr_scheduler_type cosine \ --logging_step 1 \ --save_steps 300 \ --lr_scheduler_type cosine_with_restarts \ --warmup_ratio 0.001 \ --optim adamw_torch \ --learning_rate 2e-5 \ --num_train_epochs 2.0 \ --plot_loss \ --bf16 \ --gradient_checkpointing \ --report_to tensorboard \ --deepspeed deepspeed_configs/zero2.json \ --cutoff_len 2048