diff --git a/README.md b/README.md index 95e3e8a0..aad905d0 100644 --- a/README.md +++ b/README.md @@ -143,11 +143,11 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [InternLM2](https://huggingface.co/internlm) | 7B/20B | wqkv | intern2 | | [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | q_proj,v_proj | - | | [LLaMA-2](https://huggingface.co/meta-llama) | 7B/13B/70B | q_proj,v_proj | llama2 | -| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B | q_proj,v_proj | mistral | +| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | q_proj,v_proj | mistral | | [OLMo](https://huggingface.co/allenai) | 1B/7B | att_proj | olmo | | [Phi-1.5/2](https://huggingface.co/microsoft) | 1.3B/2.7B | q_proj,v_proj | - | | [Qwen](https://huggingface.co/Qwen) | 1.8B/7B/14B/72B | c_attn | qwen | -| [Qwen1.5 (MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B | q_proj,v_proj | qwen | +| [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B | q_proj,v_proj | qwen | | [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | q_proj,v_proj | - | | [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | q_proj,v_proj | xverse | | [Yi](https://huggingface.co/01-ai) | 6B/9B/34B | q_proj,v_proj | yi | diff --git a/README_zh.md b/README_zh.md index d8b0c518..9ffc85e9 100644 --- a/README_zh.md +++ b/README_zh.md @@ -143,11 +143,11 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd | [InternLM2](https://huggingface.co/internlm) | 7B/20B | wqkv | intern2 | | [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | q_proj,v_proj | - | | [LLaMA-2](https://huggingface.co/meta-llama) | 7B/13B/70B | q_proj,v_proj | llama2 | -| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B | q_proj,v_proj | mistral | +| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | q_proj,v_proj | mistral | | [OLMo](https://huggingface.co/allenai) | 1B/7B | att_proj | olmo | | [Phi-1.5/2](https://huggingface.co/microsoft) | 1.3B/2.7B | q_proj,v_proj | - | | [Qwen](https://huggingface.co/Qwen) | 1.8B/7B/14B/72B | c_attn | qwen | -| [Qwen1.5 (MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B | q_proj,v_proj | qwen | +| [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B | q_proj,v_proj | qwen | | [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | q_proj,v_proj | - | | [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | q_proj,v_proj | xverse | | [Yi](https://huggingface.co/01-ai) | 6B/9B/34B | q_proj,v_proj | yi | diff --git a/src/llmtuner/data/template.py b/src/llmtuner/data/template.py index 1311eda5..286280e6 100644 --- a/src/llmtuner/data/template.py +++ b/src/llmtuner/data/template.py @@ -659,7 +659,7 @@ _register_template( _register_template( name="mistral", - format_user=StringFormatter(slots=["[INST] {{content}} [/INST]"]), + format_user=StringFormatter(slots=[" [INST] {{content}} [/INST]"]), format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]), force_system=True, ) diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py index e45d6ac6..cacb6c5f 100644 --- a/src/llmtuner/extras/constants.py +++ b/src/llmtuner/extras/constants.py @@ -538,14 +538,20 @@ register_model_group( register_model_group( models={ - "Mixtral-8x7B": { + "Mixtral-8x7B-v0.1": { DownloadSource.DEFAULT: "mistralai/Mixtral-8x7B-v0.1", DownloadSource.MODELSCOPE: "AI-ModelScope/Mixtral-8x7B-v0.1", }, - "Mixtral-8x7B-Chat": { + "Mixtral-8x7B-v0.1-Chat": { DownloadSource.DEFAULT: "mistralai/Mixtral-8x7B-Instruct-v0.1", DownloadSource.MODELSCOPE: "AI-ModelScope/Mixtral-8x7B-Instruct-v0.1", }, + "Mixtral-8x22B-v0.1": { + DownloadSource.DEFAULT: "mistralai/Mixtral-8x22B-v0.1", + }, + "Mixtral-8x22B-v0.1-Chat": { + DownloadSource.DEFAULT: "mistralai/Mixtral-8x22B-Instruct-v0.1", + }, }, template="mistral", )