add models

2023-11-30 19:16:13 +08:00 · 2023-11-30 19:16:13 +08:00 · 509abe8864
parent 9d38e5687d
commit 509abe8864
4 changed files with 53 additions and 10 deletions
--- a/README.md
+++ b/README.md
@ -92,7 +92,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 | [LLaMA-2](https://huggingface.co/meta-llama)             | 7B/13B/70B                  | q_proj,v_proj     | llama2    |
 | [Mistral](https://huggingface.co/mistralai)              | 7B                          | q_proj,v_proj     | mistral   |
 | [Phi-1.5](https://huggingface.co/microsoft/phi-1_5)      | 1.3B                        | Wqkv              | -         |
-| [Qwen](https://github.com/QwenLM/Qwen)                   | 7B/14B                      | c_attn            | qwen      |
+| [Qwen](https://github.com/QwenLM/Qwen)                   | 1.8B/7B/14B/72B             | c_attn            | qwen      |
 | [XVERSE](https://github.com/xverse-ai)                   | 7B/13B/65B                  | q_proj,v_proj     | xverse    |

 > [!NOTE]
@ -199,8 +199,8 @@ huggingface-cli login
 | Full   |  16  | 140GB | 240GB | 520GB | 1200GB |
 | Freeze |  16  |  20GB |  40GB | 120GB |  240GB |
 | LoRA   |  16  |  16GB |  32GB |  80GB |  160GB |
-| LoRA   |   8  |  10GB |  16GB |  40GB |   80GB |
-| LoRA   |   4  |   6GB |  12GB |  24GB |   48GB |
+| QLoRA  |   8  |  10GB |  16GB |  40GB |   80GB |
+| QLoRA  |   4  |   6GB |  12GB |  24GB |   48GB |

 ## Getting Started

--- a/README_zh.md
+++ b/README_zh.md
@ -92,7 +92,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/6ba60acc-e2e2-4bec-b846
 | [LLaMA-2](https://huggingface.co/meta-llama)             | 7B/13B/70B                  | q_proj,v_proj     | llama2    |
 | [Mistral](https://huggingface.co/mistralai)              | 7B                          | q_proj,v_proj     | mistral   |
 | [Phi-1.5](https://huggingface.co/microsoft/phi-1_5)      | 1.3B                        | Wqkv              | -         |
-| [Qwen](https://github.com/QwenLM/Qwen)                   | 7B/14B                      | c_attn            | qwen      |
+| [Qwen](https://github.com/QwenLM/Qwen)                   | 1.8B/7B/14B/72B             | c_attn            | qwen      |
 | [XVERSE](https://github.com/xverse-ai)                   | 7B/13B/65B                  | q_proj,v_proj     | xverse    |

 > [!NOTE]
@ -199,8 +199,8 @@ huggingface-cli login
 | 全参数   |  16  | 140GB | 240GB | 520GB | 1200GB |
 | 部分参数 |  16  |  20GB |  40GB | 120GB |  240GB |
 | LoRA    |  16  |  16GB |  32GB |  80GB |  160GB |
-| LoRA    |   8  |  10GB |  16GB |  40GB |   80GB |
-| LoRA    |   4  |   6GB |  12GB |  24GB |   48GB |
+| QLoRA   |   8  |  10GB |  16GB |  40GB |   80GB |
+| QLoRA   |   4  |   6GB |  12GB |  24GB |   48GB |

 ## 如何使用

--- a/src/llmtuner/data/template.py
+++ b/src/llmtuner/data/template.py
@ -408,18 +408,31 @@ register_template(
        "{{system}}"
    ],
    prompt=[
-        "### Instruction:\n{{query}}\n\n### Response:\n"
+        "User: {{query}}\n\nAssistant:"
+    ],
+    system="",
+    sep=[]
+)
+
+
+register_template(
+    name="deepseekcoder",
+    prefix=[
+        "{{system}}"
+    ],
+    prompt=[
+        "### Instruction:\n{{query}}\n### Response:\n"
    ],
    system=(
        "You are an AI programming assistant, utilizing the Deepseek Coder model, "
        "developed by Deepseek Company, and you only answer questions related to computer science. "
        "For politically sensitive questions, security and privacy issues, "
-        "and other non-computer science questions, you will refuse to answer."
+        "and other non-computer science questions, you will refuse to answer\n"
    ),
    sep=[
        "\n",
        {"token": "<|EOT|>"},
-        "\n\n"
+        "\n"
    ],
    stop_words=[
        "<|EOT|>"
--- a/src/llmtuner/extras/constants.py
+++ b/src/llmtuner/extras/constants.py
@ -131,6 +131,28 @@ register_model_group(
 )


+register_model_group(
+    models={
+        "DeepseekLLM-7B-Base": "deepseek-ai/deepseek-llm-7b-base",
+        "DeepseekLLM-67B-Base": "deepseek-ai/deepseek-llm-67b-base",
+        "DeepseekLLM-7B-Chat": "deepseek-ai/deepseek-llm-7b-chat",
+        "DeepseekLLM-67B-Chat": "deepseek-ai/deepseek-llm-67b-chat"
+    },
+    template="deepseek"
+)
+
+
+register_model_group(
+    models={
+        "DeepseekCoder-6.7B-Base": "deepseek-ai/deepseek-coder-6.7b-base",
+        "DeepseekCoder-6.7B-Chat": "deepseek-ai/deepseek-coder-6.7b-instruct",
+        "DeepseekCoder-33B-Base": "deepseek-ai/deepseek-coder-33b-base",
+        "DeepseekCoder-33B-Chat": "deepseek-ai/deepseek-coder-33b-instruct"
+    },
+    template="deepseekcoder"
+)
+
+
 register_model_group(
    models={
        "Falcon-7B": "tiiuae/falcon-7b",
@ -214,14 +236,22 @@ register_model_group(

 register_model_group(
    models={
+        "Qwen-1.8B": "Qwen/Qwen-1_8B",
        "Qwen-7B": "Qwen/Qwen-7B",
        "Qwen-14B": "Qwen/Qwen-14B",
+        "Qwen-72B": "Qwen/Qwen-72B",
+        "Qwen-1.8B-Chat": "Qwen/Qwen-1_8B-Chat",
        "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat",
        "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat",
+        "Qwen-72B-Chat": "Qwen/Qwen-72B-Chat",
+        "Qwen-1.8B-int8-Chat": "Qwen/Qwen-1_8B-Chat-Int8",
+        "Qwen-1.8B-int4-Chat": "Qwen/Qwen-1_8B-Chat-Int4",
        "Qwen-7B-int8-Chat": "Qwen/Qwen-7B-Chat-Int8",
        "Qwen-7B-int4-Chat": "Qwen/Qwen-7B-Chat-Int4",
        "Qwen-14B-int8-Chat": "Qwen/Qwen-14B-Chat-Int8",
-        "Qwen-14B-int4-Chat": "Qwen/Qwen-14B-Chat-Int4"
+        "Qwen-14B-int4-Chat": "Qwen/Qwen-14B-Chat-Int4",
+        "Qwen-72B-int8-Chat": "Qwen/Qwen-72B-Chat-Int8",
+        "Qwen-72B-int4-Chat": "Qwen/Qwen-72B-Chat-Int4"
    },
    module="c_attn",
    template="qwen"