diff --git a/README.md b/README.md index ca9e7e1d..2d95583b 100644 --- a/README.md +++ b/README.md @@ -270,6 +270,8 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t - [Booksum (de)](https://huggingface.co/datasets/mayflowergmbh/booksum_de) - [Airoboros (de)](https://huggingface.co/datasets/mayflowergmbh/airoboros-3.0_de) - [Ultrachat (de)](https://huggingface.co/datasets/mayflowergmbh/ultra-chat_de) +- [WebInstructSub (en)](https://huggingface.co/datasets/TIGER-Lab/WebInstructSub) +- [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered) diff --git a/README_zh.md b/README_zh.md index 2ede76ba..ed5475d3 100644 --- a/README_zh.md +++ b/README_zh.md @@ -270,6 +270,8 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd - [Booksum (de)](https://huggingface.co/datasets/mayflowergmbh/booksum_de) - [Airoboros (de)](https://huggingface.co/datasets/mayflowergmbh/airoboros-3.0_de) - [Ultrachat (de)](https://huggingface.co/datasets/mayflowergmbh/ultra-chat_de) +- [WebInstructSub (en)](https://huggingface.co/datasets/TIGER-Lab/WebInstructSub) +- [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered) diff --git a/data/dataset_info.json b/data/dataset_info.json index 1d226b3a..e321196a 100644 --- a/data/dataset_info.json +++ b/data/dataset_info.json @@ -524,13 +524,13 @@ "prompt": "text" } }, - "fileweb": { + "fineweb": { "hf_hub_url": "HuggingFaceFW/fineweb", "columns": { "prompt": "text" } }, - "fileweb_edu": { + "fineweb_edu": { "hf_hub_url": "HuggingFaceFW/fineweb-edu", "columns": { "prompt": "text" @@ -550,5 +550,25 @@ "prompt": "content" }, "folder": "python" + }, + "Magpie-Pro-300K-Filtered": { + "hf_hub_url": "Magpie-Align/Magpie-Pro-300K-Filtered", + "columns": { + "messages": "conversations" + }, + "tags": { + "role_tag": "from", + "content_tag": "value", + "user_tag": "human", + "assistant_tag": "gpt" + }, + "formatting": "sharegpt" + }, + "WebInstructSub": { + "hf_hub_url": "TIGER-Lab/WebInstructSub", + "columns": { + "prompt": "question", + "response": "answer" + } } -} \ No newline at end of file +}