From 24e68d29f2c411ef4ff4d147e45cda664f332b65 Mon Sep 17 00:00:00 2001 From: codemayq Date: Thu, 24 Aug 2023 19:45:36 +0800 Subject: [PATCH] add text format dataset preview in webui --- src/llmtuner/webui/utils.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/llmtuner/webui/utils.py b/src/llmtuner/webui/utils.py index 362fa008..d32d719c 100644 --- a/src/llmtuner/webui/utils.py +++ b/src/llmtuner/webui/utils.py @@ -6,6 +6,7 @@ import matplotlib.pyplot as plt from typing import TYPE_CHECKING, Any, Dict, Generator, List, Tuple from datetime import datetime +from llmtuner.dsets.utils import EXT2TYPE from llmtuner.extras.ploting import smooth from llmtuner.tuner import export_model from llmtuner.webui.common import get_model_path, get_save_dir, DATA_CONFIG @@ -50,8 +51,15 @@ def get_preview(dataset_dir: str, dataset: list) -> Tuple[int, list, Dict[str, A with open(os.path.join(dataset_dir, DATA_CONFIG), "r", encoding="utf-8") as f: dataset_info = json.load(f) data_file = dataset_info[dataset[0]]["file_name"] - with open(os.path.join(dataset_dir, data_file), "r", encoding="utf-8") as f: - data = json.load(f) + data = [] + data_format = EXT2TYPE.get(data_file.split(".")[-1], None) + if data_format == "text": + with open(os.path.join(dataset_dir, data_file), "r", encoding="utf-8") as f: + for line in f: + data.append(line.strip()) + elif data_format == "json": + with open(os.path.join(dataset_dir, data_file), "r", encoding="utf-8") as f: + data = json.load(f) return len(data), data[:2], gr.update(visible=True)