From ed025abba3fae87b4fdca0a031d236c9458cad86 Mon Sep 17 00:00:00 2001 From: anrongqiao <17710054230@163.com> Date: Thu, 1 Aug 2024 10:37:57 +0800 Subject: [PATCH] fix single dataset error with exhaust with 2b models --- FM_9G/fm9g/dragonfly/training_tasks/pretrain_indexed.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/FM_9G/fm9g/dragonfly/training_tasks/pretrain_indexed.py b/FM_9G/fm9g/dragonfly/training_tasks/pretrain_indexed.py index afee738..f247081 100644 --- a/FM_9G/fm9g/dragonfly/training_tasks/pretrain_indexed.py +++ b/FM_9G/fm9g/dragonfly/training_tasks/pretrain_indexed.py @@ -602,8 +602,7 @@ class MixedIndexedDataset(torch.utils.data.IterableDataset): idx = np.random.choice(len(self.weights), p=self.weights) data = next(self.tasks[idx]) - if step % self.update_weights_frequency == 0: - self.update_weights() + if data is None: if self.tasks[idx].allow_repeat: # _runtime_ave = self.tasks[idx].ave_tokens @@ -618,7 +617,7 @@ class MixedIndexedDataset(torch.utils.data.IterableDataset): self.tasks[idx].exhaust = True self.remain -= 1 continue - + if step % self.update_weights_frequency == 0: self.update_weights() step += 1