Merge pull request #1553 from hannlp/hans

Change the default argument settings for PPO training
This commit is contained in:
hoshi-hiyouga 2023-11-20 20:32:55 +08:00 committed by GitHub
commit 48211e3799
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 9 additions and 1 deletions

View File

@ -313,6 +313,8 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
--per_device_train_batch_size 2 \
--gradient_accumulation_steps 4 \
--lr_scheduler_type cosine \
--top_k 0 \
--top_p 0.9 \
--logging_steps 10 \
--save_steps 1000 \
--learning_rate 1e-5 \

View File

@ -313,6 +313,8 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
--per_device_train_batch_size 2 \
--gradient_accumulation_steps 4 \
--lr_scheduler_type cosine \
--top_k 0 \
--top_p 0.9 \
--logging_steps 10 \
--save_steps 1000 \
--learning_rate 1e-5 \

View File

@ -74,6 +74,10 @@ class RLHFArguments:
default=None,
metadata={"help": "Log with either 'wandb' or 'tensorboard' in PPO training."}
)
ppo_epochs: Optional[int] = field(
default=4,
metadata={"help": "Number of optimisation epochs per batch of samples"},
)
ppo_score_norm: Optional[bool] = field(
default=False,
metadata={"help": "Use score normalization in PPO training."}

View File

@ -45,7 +45,7 @@ def run_ppo(
mini_batch_size=training_args.per_device_train_batch_size,
batch_size=training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps,
gradient_accumulation_steps=training_args.gradient_accumulation_steps,
ppo_epochs=1,
ppo_epochs=finetuning_args.ppo_epochs,
max_grad_norm=training_args.max_grad_norm,
seed=training_args.seed,
optimize_device_cache=True,