add Gemma2 models

This commit is contained in:
hiyouga 2024-06-28 01:26:50 +08:00
parent 2f78b5d62a
commit 6f63050e1b
4 changed files with 15 additions and 3 deletions

View File

@ -160,7 +160,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
| [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | | [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere |
| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
| [Gemma/CodeGemma](https://huggingface.co/google) | 2B/7B | gemma | | [Gemma/Gemma2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma |
| [GLM4](https://huggingface.co/THUDM) | 9B | glm4 | | [GLM4](https://huggingface.co/THUDM) | 9B | glm4 |
| [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 | | [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 |
| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | | [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |

View File

@ -160,7 +160,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
| [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | | [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere |
| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
| [Gemma/CodeGemma](https://huggingface.co/google) | 2B/7B | gemma | | [Gemma/Gemma2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma |
| [GLM4](https://huggingface.co/THUDM) | 9B | glm4 | | [GLM4](https://huggingface.co/THUDM) | 9B | glm4 |
| [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 | | [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 |
| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | | [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |

View File

@ -507,6 +507,18 @@ register_model_group(
"Gemma-1.1-7B-Chat": { "Gemma-1.1-7B-Chat": {
DownloadSource.DEFAULT: "google/gemma-1.1-7b-it", DownloadSource.DEFAULT: "google/gemma-1.1-7b-it",
}, },
"Gemma-2-9B": {
DownloadSource.DEFAULT: "google/gemma-2-9b",
},
"Gemma-2-27B": {
DownloadSource.DEFAULT: "google/gemma-2-27b",
},
"Gemma-2-9B-Chat": {
DownloadSource.DEFAULT: "google/gemma-2-9b-it",
},
"Gemma-2-27B-Chat": {
DownloadSource.DEFAULT: "google/gemma-2-27b-it",
},
}, },
template="gemma", template="gemma",
) )

View File

@ -105,7 +105,7 @@ class PairwiseTrainer(Trainer):
Subclass and override to inject custom behavior. Subclass and override to inject custom behavior.
Note that the first element will be removed from the output tuple. Note that the first element will be removed from the output tuple.
See: https://github.com/huggingface/transformers/blob/v4.39.1/src/transformers/trainer.py#L3777 See: https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/trainer.py#L3842
""" """
# Compute rewards # Compute rewards
_, _, values = model(**inputs, output_hidden_states=True, return_dict=True) _, _, values = model(**inputs, output_hidden_states=True, return_dict=True)