update readme and constants

2023-11-15 18:04:37 +08:00 · 2023-11-15 18:04:37 +08:00 · 1e19cf242a
parent 4907452d95
commit 1e19cf242a
4 changed files with 39 additions and 89 deletions
--- a/README.md
+++ b/README.md
@ -71,7 +71,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/6ba60acc-e2e2-4bec-b846
 >
 > For the "base" models, the `--template` argument can be chosen from `default`, `alpaca`, `vicuna` etc. But make sure to use the **corresponding template** for the "chat" models.

-Please refer to [template.py](src/llmtuner/extras/template.py) for a full list of models we supported.
+Please refer to [constants.py](src/llmtuner/extras/constants.py) for a full list of models we supported.

 ## Supported Training Approaches

@ -122,6 +122,7 @@ Please refer to [template.py](src/llmtuner/extras/template.py) for a full list o
 - [OpenPlatypus (en)](https://huggingface.co/datasets/garage-bAInd/Open-Platypus)
 - [CodeAlpaca 20k (en)](https://huggingface.co/datasets/sahil2801/CodeAlpaca-20k)
 - [Alpaca CoT (multilingual)](https://huggingface.co/datasets/QingyiSi/Alpaca-CoT)
+- [OpenOrca (en)](https://huggingface.co/datasets/Open-Orca/OpenOrca)
 - [MathInstruct (en)](https://huggingface.co/datasets/TIGER-Lab/MathInstruct)
 - [Firefly 1.1M (zh)](https://huggingface.co/datasets/YeungNLP/firefly-train-1.1M)
 - [Web QA (zh)](https://huggingface.co/datasets/suolyer/webqa)
--- a/README_zh.md
+++ b/README_zh.md
@ -71,7 +71,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/6ba60acc-e2e2-4bec-b846
 >
 > 对于所有“基座”（Base）模型，`--template` 参数可以是 `default`, `alpaca`, `vicuna` 等任意值。但“对话”（Chat）模型请务必使用**对应的模板**。

-项目所支持模型的完整列表请参阅 [template.py](src/llmtuner/extras/template.py)。
+项目所支持模型的完整列表请参阅 [constants.py](src/llmtuner/extras/constants.py)。

 ## 训练方法

@ -122,6 +122,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/6ba60acc-e2e2-4bec-b846
 - [OpenPlatypus (en)](https://huggingface.co/datasets/garage-bAInd/Open-Platypus)
 - [CodeAlpaca 20k (en)](https://huggingface.co/datasets/sahil2801/CodeAlpaca-20k)
 - [Alpaca CoT (multilingual)](https://huggingface.co/datasets/QingyiSi/Alpaca-CoT)
+- [OpenOrca (en)](https://huggingface.co/datasets/Open-Orca/OpenOrca)
 - [MathInstruct (en)](https://huggingface.co/datasets/TIGER-Lab/MathInstruct)
 - [Firefly 1.1M (zh)](https://huggingface.co/datasets/YeungNLP/firefly-train-1.1M)
 - [Web QA (zh)](https://huggingface.co/datasets/suolyer/webqa)
--- a/src/llmtuner/data/template.py
+++ b/src/llmtuner/data/template.py
@ -225,9 +225,6 @@ def get_template_and_fix_tokenizer(
    return template


-r"""
-Supports: https://huggingface.co/tatsu-lab/alpaca-7b-wdiff
-"""
 register_template(
    name="alpaca",
    prefix=[
@ -246,11 +243,6 @@ register_template(
 )


-r"""
-Supports: https://huggingface.co/BAAI/AquilaChat-7B
-          https://huggingface.co/BAAI/AquilaChat2-7B
-          https://huggingface.co/BAAI/AquilaChat2-34B
-"""
 register_template(
    name="aquila",
    prefix=[
@ -273,9 +265,6 @@ register_template(
 )


-r"""
-Supports: https://huggingface.co/baichuan-inc/Baichuan-13B-Chat
-"""
 register_template(
    name="baichuan",
    prefix=[
@ -292,10 +281,6 @@ register_template(
 )


-r"""
-Supports: https://huggingface.co/baichuan-inc/Baichuan2-7B-Chat
-          https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat
-"""
 register_template(
    name="baichuan2",
    prefix=[
@ -312,9 +297,6 @@ register_template(
 )


-r"""
-Supports: https://huggingface.co/BelleGroup/BELLE-LLaMA-EXT-13B
-"""
 register_template(
    name="belle",
    prefix=[
@ -330,9 +312,6 @@ register_template(
 )


-r"""
-Supports: https://huggingface.co/vivo-ai/BlueLM-7B-Chat
-"""
 register_template(
    name="bluelm",
    prefix=[
@ -348,9 +327,6 @@ register_template(
 )


-r"""
-Supports: https://huggingface.co/THUDM/chatglm2-6b
-"""
 register_template(
    name="chatglm2",
    prefix=[
@ -369,9 +345,6 @@ register_template(
 )


-r"""
-Supports: https://huggingface.co/THUDM/chatglm3-6b
-"""
 register_template(
    name="chatglm3",
    prefix=[
@ -395,11 +368,6 @@ register_template(
 )


-r"""
-Supports: https://huggingface.co/deepseek-ai/deepseek-coder-1.3b-instruct
-          https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct
-          https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct
-"""
 register_template(
    name="deepseek",
    prefix=[
@ -426,9 +394,6 @@ register_template(
 )


-r"""
-Default template.
-"""
 register_template(
    name="default",
    prefix=[
@ -447,9 +412,6 @@ register_template(
 )


-r"""
-Supports: https://huggingface.co/tiiuae/falcon-180B-chat
-"""
 register_template(
    name="falcon",
    prefix=[
@ -466,10 +428,6 @@ register_template(
 )


-r"""
-Supports: https://huggingface.co/internlm/internlm-chat-7b
-          https://huggingface.co/internlm/internlm-chat-20b
-"""
 register_template(
    name="intern",
    prefix=[
@ -492,11 +450,6 @@ register_template(
 )


-r"""
-Supports: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
-          https://huggingface.co/meta-llama/Llama-2-13b-chat-hf
-          https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
-"""
 register_template(
    name="llama2",
    prefix=[
@ -519,10 +472,6 @@ register_template(
 )


-r"""
-Supports: https://huggingface.co/ziqingyang/chinese-alpaca-2-7b
-          https://huggingface.co/ziqingyang/chinese-alpaca-2-13b
-"""
 register_template(
    name="llama2_zh",
    prefix=[
@ -536,9 +485,6 @@ register_template(
 )


-r"""
-Supports: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1
-"""
 register_template(
    name="mistral",
    prefix=[
@ -552,9 +498,6 @@ register_template(
 )


-r"""
-Supports: https://huggingface.co/openchat/openchat_3.5
-"""
 register_template(
    name="openchat",
    prefix=[
@ -576,10 +519,6 @@ register_template(
 )


-r"""
-Supports: https://huggingface.co/Qwen/Qwen-7B-Chat
-          https://huggingface.co/Qwen/Qwen-14B-Chat
-"""
 register_template(
    name="qwen",
    prefix=[
@ -606,10 +545,6 @@ register_template(
 )


-r"""
-Supports: https://huggingface.co/HuggingFaceH4/starchat-alpha
-          https://huggingface.co/HuggingFaceH4/starchat-beta
-"""
 register_template(
    name="starchat",
    prefix=[
@ -650,10 +585,6 @@ register_template(
 )


-r"""
-Supports: https://huggingface.co/lmsys/vicuna-7b-v1.5
-          https://huggingface.co/lmsys/vicuna-13b-v1.5
-"""
 register_template(
    name="vicuna",
    prefix=[
@ -670,10 +601,6 @@ register_template(
 )


-r"""
-Supports: https://huggingface.co/xverse/XVERSE-7B-Chat
-          https://huggingface.co/xverse/XVERSE-13B-Chat
-"""
 register_template(
    name="xverse",
    prefix=[
@ -687,11 +614,6 @@ register_template(
 )


-r"""
-Supports: https://huggingface.co/wenge-research/yayi-7b
-          https://huggingface.co/wenge-research/yayi-7b-llama2
-          https://huggingface.co/wenge-research/yayi-13b-llama2
-"""
 register_template(
    name="yayi",
    prefix=[
@ -724,10 +646,6 @@ register_template(
 )


-r"""
-Supports: https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha
-          https://huggingface.co/HuggingFaceH4/zephyr-7b-beta
-"""
 register_template(
    name="zephyr",
    prefix=[
@ -746,11 +664,6 @@ register_template(
 )


-r"""
-Supports: https://huggingface.co/IDEA-CCNL/Ziya-LLaMA-13B-v1
-          https://huggingface.co/IDEA-CCNL/Ziya-LLaMA-13B-v1.1
-          https://huggingface.co/IDEA-CCNL/Ziya2-13B-Chat
-"""
 register_template(
    name="ziya",
    prefix=[
--- a/src/llmtuner/extras/constants.py
+++ b/src/llmtuner/extras/constants.py
@ -194,6 +194,14 @@ register_model_group(
 )


+register_model_group(
+    models={
+        "OpenChat3.5-7B-Chat": "openchat/openchat_3.5"
+    },
+    template="openchat"
+)
+
+
 register_model_group(
    models={
        "Phi1.5-1.3B": "microsoft/phi-1_5"
@ -221,6 +229,15 @@ register_model_group(
 )


+register_model_group(
+    models={
+        "Vicuna1.5-7B-Chat": "lmsys/vicuna-7b-v1.5",
+        "Vicuna1.5-13B-Chat": "lmsys/vicuna-13b-v1.5"
+    },
+    template="vicuna"
+)
+
+
 register_model_group(
    models={
        "XVERSE-7B": "xverse/XVERSE-7B",
@ -233,9 +250,27 @@ register_model_group(
 )


+register_model_group(
+    models={
+        "Yayi-7B": "wenge-research/yayi-7b-llama2",
+        "Yayi-13B": "wenge-research/yayi-13b-llama2"
+    },
+    template="yayi"
+)
+
+
 register_model_group(
    models={
        "Yi-6B": "01-ai/Yi-6B",
        "Yi-34B": "01-ai/Yi-34B"
    }
 )
+
+
+register_model_group(
+    models={
+        "Zephyr-7B-Alpha-Chat": "HuggingFaceH4/zephyr-7b-alpha",
+        "Zephyr-7B-Beta-Chat": "HuggingFaceH4/zephyr-7b-beta"
+    },
+    template="zephyr"
+)