add Gemma2 models

2024-06-28 01:26:50 +08:00 · 2024-06-28 01:26:50 +08:00 · 6f63050e1b
parent 2f78b5d62a
commit 6f63050e1b
4 changed files with 15 additions and 3 deletions
--- a/README.md
+++ b/README.md
@ -160,7 +160,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 | [Command-R](https://huggingface.co/CohereForAI)           | 35B/104B                         | cohere    |
 | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B                  | deepseek  |
 | [Falcon](https://huggingface.co/tiiuae)                   | 7B/11B/40B/180B                  | falcon    |
-| [Gemma/CodeGemma](https://huggingface.co/google)          | 2B/7B                            | gemma     |
+| [Gemma/Gemma2/CodeGemma](https://huggingface.co/google)   | 2B/7B/9B/27B                     | gemma     |
 | [GLM4](https://huggingface.co/THUDM)                      | 9B                               | glm4      |
 | [InternLM2](https://huggingface.co/internlm)              | 7B/20B                           | intern2   |
 | [LLaMA](https://github.com/facebookresearch/llama)        | 7B/13B/33B/65B                   | -         |
--- a/README_zh.md
+++ b/README_zh.md
@ -160,7 +160,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
 | [Command-R](https://huggingface.co/CohereForAI)           | 35B/104B                         | cohere    |
 | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B                  | deepseek  |
 | [Falcon](https://huggingface.co/tiiuae)                   | 7B/11B/40B/180B                  | falcon    |
-| [Gemma/CodeGemma](https://huggingface.co/google)          | 2B/7B                            | gemma     |
+| [Gemma/Gemma2/CodeGemma](https://huggingface.co/google)   | 2B/7B/9B/27B                     | gemma     |
 | [GLM4](https://huggingface.co/THUDM)                      | 9B                               | glm4      |
 | [InternLM2](https://huggingface.co/internlm)              | 7B/20B                           | intern2   |
 | [LLaMA](https://github.com/facebookresearch/llama)        | 7B/13B/33B/65B                   | -         |
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@ -507,6 +507,18 @@ register_model_group(
        "Gemma-1.1-7B-Chat": {
            DownloadSource.DEFAULT: "google/gemma-1.1-7b-it",
        },
+        "Gemma-2-9B": {
+            DownloadSource.DEFAULT: "google/gemma-2-9b",
+        },
+        "Gemma-2-27B": {
+            DownloadSource.DEFAULT: "google/gemma-2-27b",
+        },
+        "Gemma-2-9B-Chat": {
+            DownloadSource.DEFAULT: "google/gemma-2-9b-it",
+        },
+        "Gemma-2-27B-Chat": {
+            DownloadSource.DEFAULT: "google/gemma-2-27b-it",
+        },
    },
    template="gemma",
 )
--- a/src/llamafactory/train/rm/trainer.py
+++ b/src/llamafactory/train/rm/trainer.py
@ -105,7 +105,7 @@ class PairwiseTrainer(Trainer):
        Subclass and override to inject custom behavior.

        Note that the first element will be removed from the output tuple.
-        See: https://github.com/huggingface/transformers/blob/v4.39.1/src/transformers/trainer.py#L3777
+        See: https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/trainer.py#L3842
        """
        # Compute rewards
        _, _, values = model(**inputs, output_hidden_states=True, return_dict=True)