From c7a5620ccc72b7574255ea764693ccb866c48263 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 13 Jun 2024 01:00:56 +0800 Subject: [PATCH] add neo-sft dataset --- README.md | 1 + README_zh.md | 1 + data/dataset_info.json | 4 ++++ 3 files changed, 6 insertions(+) diff --git a/README.md b/README.md index 994a62c6..5bbaf2d7 100644 --- a/README.md +++ b/README.md @@ -259,6 +259,7 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t - [Cosmopedia (en)](https://huggingface.co/datasets/HuggingFaceTB/cosmopedia) - [STEM (zh)](https://huggingface.co/datasets/hfl/stem_zh_instruction) - [Ruozhiba (zh)](https://huggingface.co/datasets/hfl/ruozhiba_gpt4_turbo) +- [Neo-sft (zh)](https://huggingface.co/datasets/m-a-p/neo_sft_phase2) - [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k) - [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de) - [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de) diff --git a/README_zh.md b/README_zh.md index fa395c6b..fb616909 100644 --- a/README_zh.md +++ b/README_zh.md @@ -259,6 +259,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd - [Cosmopedia (en)](https://huggingface.co/datasets/HuggingFaceTB/cosmopedia) - [STEM (zh)](https://huggingface.co/datasets/hfl/stem_zh_instruction) - [Ruozhiba (zh)](https://huggingface.co/datasets/hfl/ruozhiba_gpt4_turbo) +- [Neo-sft (zh)](https://huggingface.co/datasets/m-a-p/neo_sft_phase2) - [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k) - [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de) - [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de) diff --git a/data/dataset_info.json b/data/dataset_info.json index 8c5cbb45..1d226b3a 100644 --- a/data/dataset_info.json +++ b/data/dataset_info.json @@ -248,6 +248,10 @@ "ruozhiba_gpt4": { "hf_hub_url": "hfl/ruozhiba_gpt4_turbo" }, + "neo_sft": { + "hf_hub_url": "m-a-p/neo_sft_phase2", + "formatting": "sharegpt" + }, "llava_1k_en": { "hf_hub_url": "BUAADreamer/llava-en-zh-2k", "subset": "en",