forked from p04798526/LLaMA-Factory-Mirror
tiny fix
This commit is contained in:
parent
0c9fda01e3
commit
166c837b95
|
@ -157,8 +157,8 @@ class PPOTrainerForLLaMA(PPOTrainer, PeftTrainer):
|
||||||
|
|
||||||
stats = self.step(queries, responses, rewards)
|
stats = self.step(queries, responses, rewards)
|
||||||
|
|
||||||
loss_meter.update(stats["ppo/loss/total"])
|
loss_meter.update(stats["ppo/loss/total"], n=len(rewards))
|
||||||
reward_meter.update(torch.tensor(rewards).sum().item(), n=len(rewards))
|
reward_meter.update(torch.stack(rewards).mean().item(), n=len(rewards))
|
||||||
|
|
||||||
if steps_trained == len_dataloader:
|
if steps_trained == len_dataloader:
|
||||||
dataiter = iter(self.dataloader)
|
dataiter = iter(self.dataloader)
|
||||||
|
|
Loading…
Reference in New Issue