add text format dataset preview in webui

This commit is contained in:
codemayq 2023-08-24 19:45:36 +08:00
parent 2de1a7610a
commit 24e68d29f2
1 changed files with 10 additions and 2 deletions

View File

@ -6,6 +6,7 @@ import matplotlib.pyplot as plt
from typing import TYPE_CHECKING, Any, Dict, Generator, List, Tuple
from datetime import datetime
from llmtuner.dsets.utils import EXT2TYPE
from llmtuner.extras.ploting import smooth
from llmtuner.tuner import export_model
from llmtuner.webui.common import get_model_path, get_save_dir, DATA_CONFIG
@ -50,8 +51,15 @@ def get_preview(dataset_dir: str, dataset: list) -> Tuple[int, list, Dict[str, A
with open(os.path.join(dataset_dir, DATA_CONFIG), "r", encoding="utf-8") as f:
dataset_info = json.load(f)
data_file = dataset_info[dataset[0]]["file_name"]
with open(os.path.join(dataset_dir, data_file), "r", encoding="utf-8") as f:
data = json.load(f)
data = []
data_format = EXT2TYPE.get(data_file.split(".")[-1], None)
if data_format == "text":
with open(os.path.join(dataset_dir, data_file), "r", encoding="utf-8") as f:
for line in f:
data.append(line.strip())
elif data_format == "json":
with open(os.path.join(dataset_dir, data_file), "r", encoding="utf-8") as f:
data = json.load(f)
return len(data), data[:2], gr.update(visible=True)