add throughput entry to log

This commit is contained in:
faddddeout 2024-06-04 11:04:29 +00:00
parent 82a565362c
commit b2f0459542
2 changed files with 5 additions and 2 deletions

View File

@ -170,12 +170,14 @@ class LogCallback(TrainerCallback):
percentage=round(self.cur_steps / self.max_steps * 100, 2) if self.max_steps != 0 else 100, percentage=round(self.cur_steps / self.max_steps * 100, 2) if self.max_steps != 0 else 100,
elapsed_time=self.elapsed_time, elapsed_time=self.elapsed_time,
remaining_time=self.remaining_time, remaining_time=self.remaining_time,
throughput="{:.2f}".format(state.num_input_tokens_seen / (time.time() - self.start_time)),
total_tokens=state.num_input_tokens_seen,
) )
logs = {k: v for k, v in logs.items() if v is not None} logs = {k: v for k, v in logs.items() if v is not None}
if self.webui_mode and all(key in logs for key in ["loss", "learning_rate", "epoch"]): if self.webui_mode and all(key in logs for key in ["loss", "learning_rate", "epoch"]):
logger.info( logger.info(
"{{'loss': {:.4f}, 'learning_rate': {:2.4e}, 'epoch': {:.2f}}}".format( "{{'loss': {:.4f}, 'learning_rate': {:2.4e}, 'epoch': {:.2f}, 'throughput': {}}}".format(
logs["loss"], logs["learning_rate"], logs["epoch"] logs["loss"], logs["learning_rate"], logs["epoch"], logs["throughput"]
) )
) )

View File

@ -132,6 +132,7 @@ class Runner:
pure_bf16=(get("train.compute_type") == "pure_bf16"), pure_bf16=(get("train.compute_type") == "pure_bf16"),
plot_loss=True, plot_loss=True,
ddp_timeout=180000000, ddp_timeout=180000000,
include_num_input_tokens_seen=True,
) )
# checkpoints # checkpoints