fix single dataset error with exhaust with 2b models

This commit is contained in:
anrongqiao 2024-08-01 10:37:57 +08:00
parent 441c79f807
commit ed025abba3
1 changed files with 2 additions and 3 deletions

View File

@ -602,8 +602,7 @@ class MixedIndexedDataset(torch.utils.data.IterableDataset):
idx = np.random.choice(len(self.weights), p=self.weights)
data = next(self.tasks[idx])
if step % self.update_weights_frequency == 0:
self.update_weights()
if data is None:
if self.tasks[idx].allow_repeat:
# _runtime_ave = self.tasks[idx].ave_tokens
@ -618,7 +617,7 @@ class MixedIndexedDataset(torch.utils.data.IterableDataset):
self.tasks[idx].exhaust = True
self.remain -= 1
continue
if step % self.update_weights_frequency == 0:
self.update_weights()
step += 1