diff --git a/src/llmtuner/train/ppo/trainer.py b/src/llmtuner/train/ppo/trainer.py index e6c3d0e3..1bba733b 100644 --- a/src/llmtuner/train/ppo/trainer.py +++ b/src/llmtuner/train/ppo/trainer.py @@ -298,7 +298,8 @@ class CustomPPOTrainer(PPOTrainer, Trainer): with torch.cuda.amp.autocast(dtype=self.model_args.compute_dtype): # support bf16 logits, _, values = model(**input_kwargs) - if getattr(model.config, "model_type", None) == "chatglm": + unwrapped_model: "AutoModelForCausalLMWithValueHead" = self.accelerator.unwrap_model(self.model) + if getattr(unwrapped_model.config, "model_type", None) == "chatglm": values = torch.transpose(values, 0, 1) logprobs = logprobs_from_logits(logits[:, :-1, :], input_ids[:, 1:]) diff --git a/src/llmtuner/train/rm/trainer.py b/src/llmtuner/train/rm/trainer.py index 9be64264..b018a8c4 100644 --- a/src/llmtuner/train/rm/trainer.py +++ b/src/llmtuner/train/rm/trainer.py @@ -40,7 +40,8 @@ class PairwiseTrainer(Trainer): # Compute rewards _, _, values = model(**inputs, output_hidden_states=True, return_dict=True) - if getattr(model.config, "model_type", None) == "chatglm": + unwrapped_model: "PreTrainedModel" = self.accelerator.unwrap_model(self.model) + if getattr(unwrapped_model.config, "model_type", None) == "chatglm": values = torch.transpose(values, 0, 1) # Split the inputs and rewards into two parts, chosen and rejected