From 8e95648850fdd5075724359ffdb22beb48b75952 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Fri, 7 Jun 2024 00:22:57 +0800 Subject: [PATCH] add qwen2 models --- README.md | 7 ++- README_zh.md | 7 ++- src/llamafactory/extras/constants.py | 83 ++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 77684757..1358b6e8 100644 --- a/README.md +++ b/README.md @@ -71,14 +71,16 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ ## Changelog +[24/06/07] We supported fine-tuning the **[Qwen-2](https://qwenlm.github.io/blog/qwen2/)** series models. + [24/06/05] We supported fine-tuning the **[GLM-4-9B/GLM-4-9B-Chat](https://github.com/THUDM/GLM-4)** models. [24/05/26] We supported **[SimPO](https://arxiv.org/abs/2405.14734)** algorithm for preference learning. See [examples](examples/README.md) for usage. -[24/05/20] We supported fine-tuning the **PaliGemma** series models. Note that the PaliGemma models are pre-trained models, you need to fine-tune them with `gemma` template for chat completion. -
Full Changelog +[24/05/20] We supported fine-tuning the **PaliGemma** series models. Note that the PaliGemma models are pre-trained models, you need to fine-tune them with `gemma` template for chat completion. + [24/05/18] We supported **[KTO](https://arxiv.org/abs/2402.01306)** algorithm for preference learning. See [examples](examples/README.md) for usage. [24/05/14] We supported training and inference on the Ascend NPU devices. Check [installation](#installation) section for details. @@ -172,6 +174,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [Phi-3](https://huggingface.co/microsoft) | 4B/7B/14B | phi | | [Qwen](https://huggingface.co/Qwen) | 1.8B/7B/14B/72B | qwen | | [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | qwen | +| [Qwen2 (MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/7B/57B/72B | qwen | | [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | - | | [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse | | [Yi (1/1.5)](https://huggingface.co/01-ai) | 6B/9B/34B | yi | diff --git a/README_zh.md b/README_zh.md index da5ff079..becec988 100644 --- a/README_zh.md +++ b/README_zh.md @@ -71,14 +71,16 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd ## 更新日志 +[24/06/07] 我们支持了 **[Qwen-2](https://qwenlm.github.io/blog/qwen2/)** 系列模型的微调。 + [24/06/05] 我们支持了 **[GLM-4-9B/GLM-4-9B-Chat](https://github.com/THUDM/GLM-4)** 模型的微调。 [24/05/26] 我们支持了 **[SimPO](https://arxiv.org/abs/2405.14734)** 偏好对齐算法。详细用法请参照 [examples](examples/README_zh.md)。 -[24/05/20] 我们支持了 **PaliGemma** 系列模型的微调。注意 PaliGemma 是预训练模型,你需要使用 `gemma` 模板进行微调使其获得对话能力。 -
展开日志 +[24/05/20] 我们支持了 **PaliGemma** 系列模型的微调。注意 PaliGemma 是预训练模型,你需要使用 `gemma` 模板进行微调使其获得对话能力。 + [24/05/18] 我们支持了 **[KTO](https://arxiv.org/abs/2402.01306)** 偏好对齐算法。详细用法请参照 [examples](examples/README_zh.md)。 [24/05/14] 我们支持了昇腾 NPU 设备的训练和推理。详情请查阅[安装](#安装-llama-factory)部分。 @@ -172,6 +174,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd | [Phi-3](https://huggingface.co/microsoft) | 4B/7B/14B | phi | | [Qwen](https://huggingface.co/Qwen) | 1.8B/7B/14B/72B | qwen | | [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | qwen | +| [Qwen2 (MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/7B/57B/72B | qwen | | [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | - | | [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse | | [Yi (1/1.5)](https://huggingface.co/01-ai) | 6B/9B/34B | yi | diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 4d9cb26d..4099fe56 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -1078,6 +1078,89 @@ register_model_group( ) +register_model_group( + models={ + "Qwen2-0.5B": { + DownloadSource.DEFAULT: "Qwen/Qwen2-0.5B", + DownloadSource.MODELSCOPE: "qwen/Qwen2-0.5B", + }, + "Qwen2-1.5B": { + DownloadSource.DEFAULT: "Qwen/Qwen2-1.5B", + DownloadSource.MODELSCOPE: "qwen/Qwen2-1.5B", + }, + "Qwen2-7B": { + DownloadSource.DEFAULT: "Qwen/Qwen2-7B", + DownloadSource.MODELSCOPE: "qwen/Qwen2-7B", + }, + "Qwen2-72B": { + DownloadSource.DEFAULT: "Qwen/Qwen2-72B", + DownloadSource.MODELSCOPE: "qwen/Qwen2-72B", + }, + "Qwen2-MoE-57B": { + DownloadSource.DEFAULT: "Qwen/Qwen2-57B-A14B", + DownloadSource.MODELSCOPE: "qwen/Qwen2-57B-A14B", + }, + "Qwen2-0.5B-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-0.5B-Instruct", + DownloadSource.MODELSCOPE: "qwen/Qwen2-0.5B-Instruct", + }, + "Qwen2-1.5B-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-1.5B-Instruct", + DownloadSource.MODELSCOPE: "qwen/Qwen2-1.5B-Instruct", + }, + "Qwen2-7B-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-7B-Instruct", + DownloadSource.MODELSCOPE: "qwen/Qwen2-7B-Instruct", + }, + "Qwen2-72B-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-72B-Instruct", + DownloadSource.MODELSCOPE: "qwen/Qwen2-72B-Instruct", + }, + "Qwen2-MoE-57B-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-57B-A14B-Instruct", + DownloadSource.MODELSCOPE: "qwen/Qwen2-57B-A14B-Instruct", + }, + "Qwen2-0.5B-int8-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-0.5B-Instruct-GPTQ-Int8", + DownloadSource.MODELSCOPE: "qwen/Qwen2-0.5B-Instruct-GPTQ-Int8", + }, + "Qwen2-0.5B-int4-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-0.5B-Instruct-AWQ", + DownloadSource.MODELSCOPE: "qwen/Qwen2-0.5B-Instruct-AWQ", + }, + "Qwen2-1.5B-int8-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-1.5B-Instruct-GPTQ-Int8", + DownloadSource.MODELSCOPE: "qwen/Qwen2-1.5B-Instruct-GPTQ-Int8", + }, + "Qwen2-1.5B-int4-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-1.5B-Instruct-AWQ", + DownloadSource.MODELSCOPE: "qwen/Qwen2-1.5B-Instruct-AWQ", + }, + "Qwen2-7B-int8-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-7B-Instruct-GPTQ-Int8", + DownloadSource.MODELSCOPE: "qwen/Qwen2-7B-Instruct-GPTQ-Int8", + }, + "Qwen2-7B-int4-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-7B-Instruct-AWQ", + DownloadSource.MODELSCOPE: "qwen/Qwen2-7B-Instruct-AWQ", + }, + "Qwen2-72B-int8-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-72B-Instruct-GPTQ-Int8", + DownloadSource.MODELSCOPE: "qwen/Qwen2-72B-Instruct-GPTQ-Int8", + }, + "Qwen2-72B-int4-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-72B-Instruct-AWQ", + DownloadSource.MODELSCOPE: "qwen/Qwen2-72B-Instruct-AWQ", + }, + "Qwen2-MoE-57B-int4-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-57B-A14B-Instruct-GPTQ-Int4", + DownloadSource.MODELSCOPE: "qwen/Qwen2-57B-A14B-Instruct-GPTQ-Int4", + }, + }, + template="qwen", +) + + register_model_group( models={ "SOLAR-10.7B": {