From 1e19cf242a1f843b590feefbe24b2cc0a17712b5 Mon Sep 17 00:00:00 2001 From: hiyouga Date: Wed, 15 Nov 2023 18:04:37 +0800 Subject: [PATCH] update readme and constants --- README.md | 3 +- README_zh.md | 3 +- src/llmtuner/data/template.py | 87 -------------------------------- src/llmtuner/extras/constants.py | 35 +++++++++++++ 4 files changed, 39 insertions(+), 89 deletions(-) diff --git a/README.md b/README.md index f9ea4303..9a2499ae 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/6ba60acc-e2e2-4bec-b846 > > For the "base" models, the `--template` argument can be chosen from `default`, `alpaca`, `vicuna` etc. But make sure to use the **corresponding template** for the "chat" models. -Please refer to [template.py](src/llmtuner/extras/template.py) for a full list of models we supported. +Please refer to [constants.py](src/llmtuner/extras/constants.py) for a full list of models we supported. ## Supported Training Approaches @@ -122,6 +122,7 @@ Please refer to [template.py](src/llmtuner/extras/template.py) for a full list o - [OpenPlatypus (en)](https://huggingface.co/datasets/garage-bAInd/Open-Platypus) - [CodeAlpaca 20k (en)](https://huggingface.co/datasets/sahil2801/CodeAlpaca-20k) - [Alpaca CoT (multilingual)](https://huggingface.co/datasets/QingyiSi/Alpaca-CoT) +- [OpenOrca (en)](https://huggingface.co/datasets/Open-Orca/OpenOrca) - [MathInstruct (en)](https://huggingface.co/datasets/TIGER-Lab/MathInstruct) - [Firefly 1.1M (zh)](https://huggingface.co/datasets/YeungNLP/firefly-train-1.1M) - [Web QA (zh)](https://huggingface.co/datasets/suolyer/webqa) diff --git a/README_zh.md b/README_zh.md index 74b9362a..eb0f5c6e 100644 --- a/README_zh.md +++ b/README_zh.md @@ -71,7 +71,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/6ba60acc-e2e2-4bec-b846 > > 对于所有“基座”(Base)模型,`--template` 参数可以是 `default`, `alpaca`, `vicuna` 等任意值。但“对话”(Chat)模型请务必使用**对应的模板**。 -项目所支持模型的完整列表请参阅 [template.py](src/llmtuner/extras/template.py)。 +项目所支持模型的完整列表请参阅 [constants.py](src/llmtuner/extras/constants.py)。 ## 训练方法 @@ -122,6 +122,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/6ba60acc-e2e2-4bec-b846 - [OpenPlatypus (en)](https://huggingface.co/datasets/garage-bAInd/Open-Platypus) - [CodeAlpaca 20k (en)](https://huggingface.co/datasets/sahil2801/CodeAlpaca-20k) - [Alpaca CoT (multilingual)](https://huggingface.co/datasets/QingyiSi/Alpaca-CoT) +- [OpenOrca (en)](https://huggingface.co/datasets/Open-Orca/OpenOrca) - [MathInstruct (en)](https://huggingface.co/datasets/TIGER-Lab/MathInstruct) - [Firefly 1.1M (zh)](https://huggingface.co/datasets/YeungNLP/firefly-train-1.1M) - [Web QA (zh)](https://huggingface.co/datasets/suolyer/webqa) diff --git a/src/llmtuner/data/template.py b/src/llmtuner/data/template.py index bcb9ffa0..03b3c011 100644 --- a/src/llmtuner/data/template.py +++ b/src/llmtuner/data/template.py @@ -225,9 +225,6 @@ def get_template_and_fix_tokenizer( return template -r""" -Supports: https://huggingface.co/tatsu-lab/alpaca-7b-wdiff -""" register_template( name="alpaca", prefix=[ @@ -246,11 +243,6 @@ register_template( ) -r""" -Supports: https://huggingface.co/BAAI/AquilaChat-7B - https://huggingface.co/BAAI/AquilaChat2-7B - https://huggingface.co/BAAI/AquilaChat2-34B -""" register_template( name="aquila", prefix=[ @@ -273,9 +265,6 @@ register_template( ) -r""" -Supports: https://huggingface.co/baichuan-inc/Baichuan-13B-Chat -""" register_template( name="baichuan", prefix=[ @@ -292,10 +281,6 @@ register_template( ) -r""" -Supports: https://huggingface.co/baichuan-inc/Baichuan2-7B-Chat - https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat -""" register_template( name="baichuan2", prefix=[ @@ -312,9 +297,6 @@ register_template( ) -r""" -Supports: https://huggingface.co/BelleGroup/BELLE-LLaMA-EXT-13B -""" register_template( name="belle", prefix=[ @@ -330,9 +312,6 @@ register_template( ) -r""" -Supports: https://huggingface.co/vivo-ai/BlueLM-7B-Chat -""" register_template( name="bluelm", prefix=[ @@ -348,9 +327,6 @@ register_template( ) -r""" -Supports: https://huggingface.co/THUDM/chatglm2-6b -""" register_template( name="chatglm2", prefix=[ @@ -369,9 +345,6 @@ register_template( ) -r""" -Supports: https://huggingface.co/THUDM/chatglm3-6b -""" register_template( name="chatglm3", prefix=[ @@ -395,11 +368,6 @@ register_template( ) -r""" -Supports: https://huggingface.co/deepseek-ai/deepseek-coder-1.3b-instruct - https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct - https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct -""" register_template( name="deepseek", prefix=[ @@ -426,9 +394,6 @@ register_template( ) -r""" -Default template. -""" register_template( name="default", prefix=[ @@ -447,9 +412,6 @@ register_template( ) -r""" -Supports: https://huggingface.co/tiiuae/falcon-180B-chat -""" register_template( name="falcon", prefix=[ @@ -466,10 +428,6 @@ register_template( ) -r""" -Supports: https://huggingface.co/internlm/internlm-chat-7b - https://huggingface.co/internlm/internlm-chat-20b -""" register_template( name="intern", prefix=[ @@ -492,11 +450,6 @@ register_template( ) -r""" -Supports: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf - https://huggingface.co/meta-llama/Llama-2-13b-chat-hf - https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -""" register_template( name="llama2", prefix=[ @@ -519,10 +472,6 @@ register_template( ) -r""" -Supports: https://huggingface.co/ziqingyang/chinese-alpaca-2-7b - https://huggingface.co/ziqingyang/chinese-alpaca-2-13b -""" register_template( name="llama2_zh", prefix=[ @@ -536,9 +485,6 @@ register_template( ) -r""" -Supports: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -""" register_template( name="mistral", prefix=[ @@ -552,9 +498,6 @@ register_template( ) -r""" -Supports: https://huggingface.co/openchat/openchat_3.5 -""" register_template( name="openchat", prefix=[ @@ -576,10 +519,6 @@ register_template( ) -r""" -Supports: https://huggingface.co/Qwen/Qwen-7B-Chat - https://huggingface.co/Qwen/Qwen-14B-Chat -""" register_template( name="qwen", prefix=[ @@ -606,10 +545,6 @@ register_template( ) -r""" -Supports: https://huggingface.co/HuggingFaceH4/starchat-alpha - https://huggingface.co/HuggingFaceH4/starchat-beta -""" register_template( name="starchat", prefix=[ @@ -650,10 +585,6 @@ register_template( ) -r""" -Supports: https://huggingface.co/lmsys/vicuna-7b-v1.5 - https://huggingface.co/lmsys/vicuna-13b-v1.5 -""" register_template( name="vicuna", prefix=[ @@ -670,10 +601,6 @@ register_template( ) -r""" -Supports: https://huggingface.co/xverse/XVERSE-7B-Chat - https://huggingface.co/xverse/XVERSE-13B-Chat -""" register_template( name="xverse", prefix=[ @@ -687,11 +614,6 @@ register_template( ) -r""" -Supports: https://huggingface.co/wenge-research/yayi-7b - https://huggingface.co/wenge-research/yayi-7b-llama2 - https://huggingface.co/wenge-research/yayi-13b-llama2 -""" register_template( name="yayi", prefix=[ @@ -724,10 +646,6 @@ register_template( ) -r""" -Supports: https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha - https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -""" register_template( name="zephyr", prefix=[ @@ -746,11 +664,6 @@ register_template( ) -r""" -Supports: https://huggingface.co/IDEA-CCNL/Ziya-LLaMA-13B-v1 - https://huggingface.co/IDEA-CCNL/Ziya-LLaMA-13B-v1.1 - https://huggingface.co/IDEA-CCNL/Ziya2-13B-Chat -""" register_template( name="ziya", prefix=[ diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py index 861d4a99..64d36745 100644 --- a/src/llmtuner/extras/constants.py +++ b/src/llmtuner/extras/constants.py @@ -194,6 +194,14 @@ register_model_group( ) +register_model_group( + models={ + "OpenChat3.5-7B-Chat": "openchat/openchat_3.5" + }, + template="openchat" +) + + register_model_group( models={ "Phi1.5-1.3B": "microsoft/phi-1_5" @@ -221,6 +229,15 @@ register_model_group( ) +register_model_group( + models={ + "Vicuna1.5-7B-Chat": "lmsys/vicuna-7b-v1.5", + "Vicuna1.5-13B-Chat": "lmsys/vicuna-13b-v1.5" + }, + template="vicuna" +) + + register_model_group( models={ "XVERSE-7B": "xverse/XVERSE-7B", @@ -233,9 +250,27 @@ register_model_group( ) +register_model_group( + models={ + "Yayi-7B": "wenge-research/yayi-7b-llama2", + "Yayi-13B": "wenge-research/yayi-13b-llama2" + }, + template="yayi" +) + + register_model_group( models={ "Yi-6B": "01-ai/Yi-6B", "Yi-34B": "01-ai/Yi-34B" } ) + + +register_model_group( + models={ + "Zephyr-7B-Alpha-Chat": "HuggingFaceH4/zephyr-7b-alpha", + "Zephyr-7B-Beta-Chat": "HuggingFaceH4/zephyr-7b-beta" + }, + template="zephyr" +)