diff --git a/src/llmtuner/extras/misc.py b/src/llmtuner/extras/misc.py index cd2ff5bc..85761f1d 100644 --- a/src/llmtuner/extras/misc.py +++ b/src/llmtuner/extras/misc.py @@ -64,7 +64,7 @@ def check_dependencies() -> None: require_version("transformers>=4.37.2", "To fix: pip install transformers>=4.37.2") require_version("datasets>=2.14.3", "To fix: pip install datasets>=2.14.3") require_version("accelerate>=0.27.2", "To fix: pip install accelerate>=0.27.2") - require_version("peft>=0.9.0", "To fix: pip install peft>=0.9.0") + require_version("peft>=0.10.0", "To fix: pip install peft>=0.10.0") require_version("trl>=0.8.1", "To fix: pip install trl>=0.8.1") diff --git a/src/llmtuner/hparams/finetuning_args.py b/src/llmtuner/hparams/finetuning_args.py index 0dd28a8f..c1f08334 100644 --- a/src/llmtuner/hparams/finetuning_args.py +++ b/src/llmtuner/hparams/finetuning_args.py @@ -118,10 +118,6 @@ class RLHFArguments: default=4, metadata={"help": "The number of epochs to perform in a PPO optimization step."}, ) - ppo_logger: Optional[str] = field( - default=None, - metadata={"help": 'Log with either "wandb" or "tensorboard" in PPO training.'}, - ) ppo_score_norm: bool = field( default=False, metadata={"help": "Use score normalization in PPO training."}, diff --git a/src/llmtuner/hparams/parser.py b/src/llmtuner/hparams/parser.py index 8e27f379..4fbc3db9 100644 --- a/src/llmtuner/hparams/parser.py +++ b/src/llmtuner/hparams/parser.py @@ -8,7 +8,6 @@ import transformers from transformers import HfArgumentParser, Seq2SeqTrainingArguments from transformers.trainer_utils import get_last_checkpoint from transformers.utils import is_torch_bf16_gpu_available -from transformers.utils.versions import require_version from ..extras.logging import get_logger from ..extras.misc import check_dependencies @@ -119,6 +118,13 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: if finetuning_args.stage == "ppo" and finetuning_args.reward_model_type == "lora" and model_args.use_unsloth: raise ValueError("Unsloth does not support lora reward model.") + if ( + finetuning_args.stage == "ppo" + and training_args.report_to is not None + and training_args.report_to[0] not in ["wandb", "tensorboard"] + ): + raise ValueError("PPO only accepts wandb or tensorboard logger.") + if training_args.max_steps == -1 and data_args.streaming: raise ValueError("Please specify `max_steps` in streaming mode.") @@ -128,12 +134,8 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: if training_args.do_train and model_args.use_unsloth and not is_unsloth_available(): raise ValueError("Unsloth was not installed: https://github.com/unslothai/unsloth") - if finetuning_args.use_dora: - if model_args.quantization_bit is not None: - require_version("peft>=0.10.0", "To fix: pip install peft>=0.10.0") - - if model_args.use_unsloth: - raise ValueError("Unsloth does not support DoRA.") + if finetuning_args.use_dora and model_args.use_unsloth: + raise ValueError("Unsloth does not support DoRA.") if finetuning_args.pure_bf16: if not is_torch_bf16_gpu_available(): diff --git a/src/llmtuner/train/ppo/workflow.py b/src/llmtuner/train/ppo/workflow.py index 658b244d..0e03086b 100644 --- a/src/llmtuner/train/ppo/workflow.py +++ b/src/llmtuner/train/ppo/workflow.py @@ -55,11 +55,11 @@ def run_ppo( seed=training_args.seed, optimize_device_cache=True, target=finetuning_args.ppo_target, - log_with=finetuning_args.ppo_logger, use_score_scaling=finetuning_args.ppo_score_norm, use_score_norm=finetuning_args.ppo_score_norm, whiten_rewards=finetuning_args.ppo_whiten_rewards, accelerator_kwargs={"step_scheduler_with_optimizer": False}, + log_with=training_args.report_to[0] if training_args.report_to is not None else None, project_kwargs={"logging_dir": training_args.logging_dir}, ) @@ -71,10 +71,10 @@ def run_ppo( num_training_steps = training_args.num_train_epochs * math.ceil(len(dataset) / total_train_batch_size) optimizer = create_custom_optimzer(model, training_args, finetuning_args) - create_custom_scheduler(training_args, num_training_steps, optimizer) if optimizer is None: optimizer = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=training_args.learning_rate) + create_custom_scheduler(training_args, num_training_steps, optimizer) lr_scheduler = get_scheduler( training_args.lr_scheduler_type, optimizer=optimizer,