forked from p04798526/LLaMA-Factory-Mirror
fix #1668
This commit is contained in:
parent
a38dbf55e3
commit
1585962eb7
|
@ -298,7 +298,8 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
|
|||
with torch.cuda.amp.autocast(dtype=self.model_args.compute_dtype): # support bf16
|
||||
logits, _, values = model(**input_kwargs)
|
||||
|
||||
if getattr(model.config, "model_type", None) == "chatglm":
|
||||
unwrapped_model: "AutoModelForCausalLMWithValueHead" = self.accelerator.unwrap_model(self.model)
|
||||
if getattr(unwrapped_model.config, "model_type", None) == "chatglm":
|
||||
values = torch.transpose(values, 0, 1)
|
||||
|
||||
logprobs = logprobs_from_logits(logits[:, :-1, :], input_ids[:, 1:])
|
||||
|
|
|
@ -40,7 +40,8 @@ class PairwiseTrainer(Trainer):
|
|||
# Compute rewards
|
||||
_, _, values = model(**inputs, output_hidden_states=True, return_dict=True)
|
||||
|
||||
if getattr(model.config, "model_type", None) == "chatglm":
|
||||
unwrapped_model: "PreTrainedModel" = self.accelerator.unwrap_model(self.model)
|
||||
if getattr(unwrapped_model.config, "model_type", None) == "chatglm":
|
||||
values = torch.transpose(values, 0, 1)
|
||||
|
||||
# Split the inputs and rewards into two parts, chosen and rejected
|
||||
|
|
Loading…
Reference in New Issue