diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py index 069ea199..720fa493 100644 --- a/src/llamafactory/data/loader.py +++ b/src/llamafactory/data/loader.py @@ -14,6 +14,7 @@ import os import sys +import time from typing import TYPE_CHECKING, Dict, Literal, Optional, Sequence, Union import numpy as np @@ -212,6 +213,10 @@ def get_dataset( if has_tokenized_data(data_args.tokenized_path): logger.warning("Loading dataset from disk will ignore other data arguments.") dataset_dict: "DatasetDict" = load_from_disk(data_args.tokenized_path) + print(data_args.tokenized_path) + print(dataset_dict) + time.sleep(100) + logger.info("Loaded tokenized dataset from {}.".format(data_args.tokenized_path)) dataset_module: Dict[str, "Dataset"] = {}