diff --git a/data/dataset_info.json b/data/dataset_info.json index d899f011..cc7613f6 100644 --- a/data/dataset_info.json +++ b/data/dataset_info.json @@ -98,7 +98,7 @@ "file_name": "wiki_demo.txt", "file_sha1": "b2288edb05b233e5b35250fd4b308a5fa21fa66d", "columns": { - "prompt": "instruction", + "prompt": "text", "query": "", "response": "", "history": "" diff --git a/src/utils/common.py b/src/utils/common.py index 30f226f8..4ba573e4 100644 --- a/src/utils/common.py +++ b/src/utils/common.py @@ -316,9 +316,6 @@ def prepare_data( max_samples_temp = min(len(dataset), max_samples) dataset = dataset.select(range(max_samples_temp)) - if dataset.column_names[0] == "text": # for plaintext (in pre-training) - dataset = dataset.rename_column("text", getattr(dataset_attr, "prompt_column")) - dummy_data = [None] * len(dataset) for column_name, target_name in [ ("prompt_column", "prompt"),