forked from p04798526/LLaMA-Factory-Mirror
add Gemma2 models
This commit is contained in:
parent
2f78b5d62a
commit
6f63050e1b
|
@ -160,7 +160,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
|
||||||
| [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere |
|
| [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere |
|
||||||
| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
|
| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
|
||||||
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
|
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
|
||||||
| [Gemma/CodeGemma](https://huggingface.co/google) | 2B/7B | gemma |
|
| [Gemma/Gemma2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma |
|
||||||
| [GLM4](https://huggingface.co/THUDM) | 9B | glm4 |
|
| [GLM4](https://huggingface.co/THUDM) | 9B | glm4 |
|
||||||
| [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 |
|
| [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 |
|
||||||
| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |
|
| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |
|
||||||
|
|
|
@ -160,7 +160,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
|
||||||
| [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere |
|
| [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere |
|
||||||
| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
|
| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
|
||||||
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
|
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
|
||||||
| [Gemma/CodeGemma](https://huggingface.co/google) | 2B/7B | gemma |
|
| [Gemma/Gemma2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma |
|
||||||
| [GLM4](https://huggingface.co/THUDM) | 9B | glm4 |
|
| [GLM4](https://huggingface.co/THUDM) | 9B | glm4 |
|
||||||
| [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 |
|
| [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 |
|
||||||
| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |
|
| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |
|
||||||
|
|
|
@ -507,6 +507,18 @@ register_model_group(
|
||||||
"Gemma-1.1-7B-Chat": {
|
"Gemma-1.1-7B-Chat": {
|
||||||
DownloadSource.DEFAULT: "google/gemma-1.1-7b-it",
|
DownloadSource.DEFAULT: "google/gemma-1.1-7b-it",
|
||||||
},
|
},
|
||||||
|
"Gemma-2-9B": {
|
||||||
|
DownloadSource.DEFAULT: "google/gemma-2-9b",
|
||||||
|
},
|
||||||
|
"Gemma-2-27B": {
|
||||||
|
DownloadSource.DEFAULT: "google/gemma-2-27b",
|
||||||
|
},
|
||||||
|
"Gemma-2-9B-Chat": {
|
||||||
|
DownloadSource.DEFAULT: "google/gemma-2-9b-it",
|
||||||
|
},
|
||||||
|
"Gemma-2-27B-Chat": {
|
||||||
|
DownloadSource.DEFAULT: "google/gemma-2-27b-it",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
template="gemma",
|
template="gemma",
|
||||||
)
|
)
|
||||||
|
|
|
@ -105,7 +105,7 @@ class PairwiseTrainer(Trainer):
|
||||||
Subclass and override to inject custom behavior.
|
Subclass and override to inject custom behavior.
|
||||||
|
|
||||||
Note that the first element will be removed from the output tuple.
|
Note that the first element will be removed from the output tuple.
|
||||||
See: https://github.com/huggingface/transformers/blob/v4.39.1/src/transformers/trainer.py#L3777
|
See: https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/trainer.py#L3842
|
||||||
"""
|
"""
|
||||||
# Compute rewards
|
# Compute rewards
|
||||||
_, _, values = model(**inputs, output_hidden_states=True, return_dict=True)
|
_, _, values = model(**inputs, output_hidden_states=True, return_dict=True)
|
||||||
|
|
Loading…
Reference in New Issue