From d75e377b0f6f3fd7c034676b81ddef3aab1d6901 Mon Sep 17 00:00:00 2001 From: hiyouga Date: Fri, 18 Aug 2023 13:07:35 +0800 Subject: [PATCH] tiny fix --- src/llmtuner/tuner/ppo/trainer.py | 11 ++++++++--- src/llmtuner/tuner/rm/trainer.py | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/llmtuner/tuner/ppo/trainer.py b/src/llmtuner/tuner/ppo/trainer.py index ad2021f8..9358adf0 100644 --- a/src/llmtuner/tuner/ppo/trainer.py +++ b/src/llmtuner/tuner/ppo/trainer.py @@ -182,9 +182,13 @@ class PPOPeftTrainer(PPOTrainer, PeftTrainer): """ replace_model(unwrapped_model, target="reward") batch = self.prepare_model_inputs(queries, responses) - _, _, values = self.model(**batch, output_hidden_states=True, return_dict=True) - if values.size(0) != batch["input_ids"].size(0): # adapt chatglm2 + + with torch.cuda.amp.autocast(dtype=self.compute_dtype): # support bf16 + _, _, values = self.model(**batch, output_hidden_states=True, return_dict=True) + + if values.size(0) != batch["input_ids"].size(0): # adapt to chatglm2 values = torch.transpose(values, 0, 1) + rewards = [reward for reward in values[:, -1].float().detach().cpu()] # use fp32 type replace_model(unwrapped_model, target="default") return rewards @@ -220,7 +224,7 @@ class PPOPeftTrainer(PPOTrainer, PeftTrainer): with torch.cuda.amp.autocast(dtype=self.compute_dtype): # support bf16 logits, _, values = model(**input_kwargs) - if values.size(0) != input_ids.size(0): # adapt chatglm2 + if values.size(0) != input_ids.size(0): # adapt to chatglm2 values = torch.transpose(values, 0, 1) logprobs = logprobs_from_logits(logits[:, :-1, :], input_ids[:, 1:]) @@ -240,6 +244,7 @@ class PPOPeftTrainer(PPOTrainer, PeftTrainer): all_logits.append(logits) else: del logits + all_values.append(values) all_logprobs.append(logprobs) all_masks.append(masks) diff --git a/src/llmtuner/tuner/rm/trainer.py b/src/llmtuner/tuner/rm/trainer.py index 99b4b152..08feda78 100644 --- a/src/llmtuner/tuner/rm/trainer.py +++ b/src/llmtuner/tuner/rm/trainer.py @@ -42,7 +42,7 @@ class PairwisePeftTrainer(PeftTrainer): """ batch_size = inputs["input_ids"].size(0) // 2 _, _, values = model(**inputs, output_hidden_states=True, return_dict=True) - if values.size(0) != inputs["input_ids"].size(0): # adapt chatglm2 + if values.size(0) != inputs["input_ids"].size(0): # adapt to chatglm2 values = torch.transpose(values, 0, 1) r_accept, r_reject = values[:, -1].split(batch_size, dim=0) loss = -torch.log(torch.sigmoid(r_accept - r_reject)).mean()