From 344b9a36b2e0b60ee61fba171b35a391e3517fed Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 18 Jun 2024 23:32:18 +0800 Subject: [PATCH] tiny fix --- Dockerfile | 1 + README.md | 4 ++-- README_zh.md | 4 ++-- data/dataset_info.json | 33 ++++++++++++--------------------- 4 files changed, 17 insertions(+), 25 deletions(-) diff --git a/Dockerfile b/Dockerfile index 9c2f645c..61d58005 100644 --- a/Dockerfile +++ b/Dockerfile @@ -43,4 +43,5 @@ EXPOSE 7860 # Expose port 8000 for the API service EXPOSE 8000 +# Launch LLaMA Board CMD [ "llamafactory-cli", "webui" ] diff --git a/README.md b/README.md index 2d95583b..0e8e55f7 100644 --- a/README.md +++ b/README.md @@ -260,6 +260,8 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t - [STEM (zh)](https://huggingface.co/datasets/hfl/stem_zh_instruction) - [Ruozhiba (zh)](https://huggingface.co/datasets/hfl/ruozhiba_gpt4_turbo) - [Neo-sft (zh)](https://huggingface.co/datasets/m-a-p/neo_sft_phase2) +- [WebInstructSub (en)](https://huggingface.co/datasets/TIGER-Lab/WebInstructSub) +- [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered) - [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k) - [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de) - [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de) @@ -270,8 +272,6 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t - [Booksum (de)](https://huggingface.co/datasets/mayflowergmbh/booksum_de) - [Airoboros (de)](https://huggingface.co/datasets/mayflowergmbh/airoboros-3.0_de) - [Ultrachat (de)](https://huggingface.co/datasets/mayflowergmbh/ultra-chat_de) -- [WebInstructSub (en)](https://huggingface.co/datasets/TIGER-Lab/WebInstructSub) -- [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered) diff --git a/README_zh.md b/README_zh.md index ed5475d3..152cd6fa 100644 --- a/README_zh.md +++ b/README_zh.md @@ -260,6 +260,8 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd - [STEM (zh)](https://huggingface.co/datasets/hfl/stem_zh_instruction) - [Ruozhiba (zh)](https://huggingface.co/datasets/hfl/ruozhiba_gpt4_turbo) - [Neo-sft (zh)](https://huggingface.co/datasets/m-a-p/neo_sft_phase2) +- [WebInstructSub (en)](https://huggingface.co/datasets/TIGER-Lab/WebInstructSub) +- [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered) - [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k) - [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de) - [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de) @@ -270,8 +272,6 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd - [Booksum (de)](https://huggingface.co/datasets/mayflowergmbh/booksum_de) - [Airoboros (de)](https://huggingface.co/datasets/mayflowergmbh/airoboros-3.0_de) - [Ultrachat (de)](https://huggingface.co/datasets/mayflowergmbh/ultra-chat_de) -- [WebInstructSub (en)](https://huggingface.co/datasets/TIGER-Lab/WebInstructSub) -- [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered) diff --git a/data/dataset_info.json b/data/dataset_info.json index e321196a..f8ffd407 100644 --- a/data/dataset_info.json +++ b/data/dataset_info.json @@ -252,6 +252,17 @@ "hf_hub_url": "m-a-p/neo_sft_phase2", "formatting": "sharegpt" }, + "magpie_pro_300k": { + "hf_hub_url": "Magpie-Align/Magpie-Pro-300K-Filtered", + "formatting": "sharegpt" + }, + "web_instruct": { + "hf_hub_url": "TIGER-Lab/WebInstructSub", + "columns": { + "prompt": "question", + "response": "answer" + } + }, "llava_1k_en": { "hf_hub_url": "BUAADreamer/llava-en-zh-2k", "subset": "en", @@ -550,25 +561,5 @@ "prompt": "content" }, "folder": "python" - }, - "Magpie-Pro-300K-Filtered": { - "hf_hub_url": "Magpie-Align/Magpie-Pro-300K-Filtered", - "columns": { - "messages": "conversations" - }, - "tags": { - "role_tag": "from", - "content_tag": "value", - "user_tag": "human", - "assistant_tag": "gpt" - }, - "formatting": "sharegpt" - }, - "WebInstructSub": { - "hf_hub_url": "TIGER-Lab/WebInstructSub", - "columns": { - "prompt": "question", - "response": "answer" - } } -} +} \ No newline at end of file