add models
This commit is contained in:
parent
9d38e5687d
commit
509abe8864
|
@ -92,7 +92,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
|
||||||
| [LLaMA-2](https://huggingface.co/meta-llama) | 7B/13B/70B | q_proj,v_proj | llama2 |
|
| [LLaMA-2](https://huggingface.co/meta-llama) | 7B/13B/70B | q_proj,v_proj | llama2 |
|
||||||
| [Mistral](https://huggingface.co/mistralai) | 7B | q_proj,v_proj | mistral |
|
| [Mistral](https://huggingface.co/mistralai) | 7B | q_proj,v_proj | mistral |
|
||||||
| [Phi-1.5](https://huggingface.co/microsoft/phi-1_5) | 1.3B | Wqkv | - |
|
| [Phi-1.5](https://huggingface.co/microsoft/phi-1_5) | 1.3B | Wqkv | - |
|
||||||
| [Qwen](https://github.com/QwenLM/Qwen) | 7B/14B | c_attn | qwen |
|
| [Qwen](https://github.com/QwenLM/Qwen) | 1.8B/7B/14B/72B | c_attn | qwen |
|
||||||
| [XVERSE](https://github.com/xverse-ai) | 7B/13B/65B | q_proj,v_proj | xverse |
|
| [XVERSE](https://github.com/xverse-ai) | 7B/13B/65B | q_proj,v_proj | xverse |
|
||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
|
@ -199,8 +199,8 @@ huggingface-cli login
|
||||||
| Full | 16 | 140GB | 240GB | 520GB | 1200GB |
|
| Full | 16 | 140GB | 240GB | 520GB | 1200GB |
|
||||||
| Freeze | 16 | 20GB | 40GB | 120GB | 240GB |
|
| Freeze | 16 | 20GB | 40GB | 120GB | 240GB |
|
||||||
| LoRA | 16 | 16GB | 32GB | 80GB | 160GB |
|
| LoRA | 16 | 16GB | 32GB | 80GB | 160GB |
|
||||||
| LoRA | 8 | 10GB | 16GB | 40GB | 80GB |
|
| QLoRA | 8 | 10GB | 16GB | 40GB | 80GB |
|
||||||
| LoRA | 4 | 6GB | 12GB | 24GB | 48GB |
|
| QLoRA | 4 | 6GB | 12GB | 24GB | 48GB |
|
||||||
|
|
||||||
## Getting Started
|
## Getting Started
|
||||||
|
|
||||||
|
|
|
@ -92,7 +92,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/6ba60acc-e2e2-4bec-b846
|
||||||
| [LLaMA-2](https://huggingface.co/meta-llama) | 7B/13B/70B | q_proj,v_proj | llama2 |
|
| [LLaMA-2](https://huggingface.co/meta-llama) | 7B/13B/70B | q_proj,v_proj | llama2 |
|
||||||
| [Mistral](https://huggingface.co/mistralai) | 7B | q_proj,v_proj | mistral |
|
| [Mistral](https://huggingface.co/mistralai) | 7B | q_proj,v_proj | mistral |
|
||||||
| [Phi-1.5](https://huggingface.co/microsoft/phi-1_5) | 1.3B | Wqkv | - |
|
| [Phi-1.5](https://huggingface.co/microsoft/phi-1_5) | 1.3B | Wqkv | - |
|
||||||
| [Qwen](https://github.com/QwenLM/Qwen) | 7B/14B | c_attn | qwen |
|
| [Qwen](https://github.com/QwenLM/Qwen) | 1.8B/7B/14B/72B | c_attn | qwen |
|
||||||
| [XVERSE](https://github.com/xverse-ai) | 7B/13B/65B | q_proj,v_proj | xverse |
|
| [XVERSE](https://github.com/xverse-ai) | 7B/13B/65B | q_proj,v_proj | xverse |
|
||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
|
@ -199,8 +199,8 @@ huggingface-cli login
|
||||||
| 全参数 | 16 | 140GB | 240GB | 520GB | 1200GB |
|
| 全参数 | 16 | 140GB | 240GB | 520GB | 1200GB |
|
||||||
| 部分参数 | 16 | 20GB | 40GB | 120GB | 240GB |
|
| 部分参数 | 16 | 20GB | 40GB | 120GB | 240GB |
|
||||||
| LoRA | 16 | 16GB | 32GB | 80GB | 160GB |
|
| LoRA | 16 | 16GB | 32GB | 80GB | 160GB |
|
||||||
| LoRA | 8 | 10GB | 16GB | 40GB | 80GB |
|
| QLoRA | 8 | 10GB | 16GB | 40GB | 80GB |
|
||||||
| LoRA | 4 | 6GB | 12GB | 24GB | 48GB |
|
| QLoRA | 4 | 6GB | 12GB | 24GB | 48GB |
|
||||||
|
|
||||||
## 如何使用
|
## 如何使用
|
||||||
|
|
||||||
|
|
|
@ -408,18 +408,31 @@ register_template(
|
||||||
"{{system}}"
|
"{{system}}"
|
||||||
],
|
],
|
||||||
prompt=[
|
prompt=[
|
||||||
"### Instruction:\n{{query}}\n\n### Response:\n"
|
"User: {{query}}\n\nAssistant:"
|
||||||
|
],
|
||||||
|
system="",
|
||||||
|
sep=[]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register_template(
|
||||||
|
name="deepseekcoder",
|
||||||
|
prefix=[
|
||||||
|
"{{system}}"
|
||||||
|
],
|
||||||
|
prompt=[
|
||||||
|
"### Instruction:\n{{query}}\n### Response:\n"
|
||||||
],
|
],
|
||||||
system=(
|
system=(
|
||||||
"You are an AI programming assistant, utilizing the Deepseek Coder model, "
|
"You are an AI programming assistant, utilizing the Deepseek Coder model, "
|
||||||
"developed by Deepseek Company, and you only answer questions related to computer science. "
|
"developed by Deepseek Company, and you only answer questions related to computer science. "
|
||||||
"For politically sensitive questions, security and privacy issues, "
|
"For politically sensitive questions, security and privacy issues, "
|
||||||
"and other non-computer science questions, you will refuse to answer."
|
"and other non-computer science questions, you will refuse to answer\n"
|
||||||
),
|
),
|
||||||
sep=[
|
sep=[
|
||||||
"\n",
|
"\n",
|
||||||
{"token": "<|EOT|>"},
|
{"token": "<|EOT|>"},
|
||||||
"\n\n"
|
"\n"
|
||||||
],
|
],
|
||||||
stop_words=[
|
stop_words=[
|
||||||
"<|EOT|>"
|
"<|EOT|>"
|
||||||
|
|
|
@ -131,6 +131,28 @@ register_model_group(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register_model_group(
|
||||||
|
models={
|
||||||
|
"DeepseekLLM-7B-Base": "deepseek-ai/deepseek-llm-7b-base",
|
||||||
|
"DeepseekLLM-67B-Base": "deepseek-ai/deepseek-llm-67b-base",
|
||||||
|
"DeepseekLLM-7B-Chat": "deepseek-ai/deepseek-llm-7b-chat",
|
||||||
|
"DeepseekLLM-67B-Chat": "deepseek-ai/deepseek-llm-67b-chat"
|
||||||
|
},
|
||||||
|
template="deepseek"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register_model_group(
|
||||||
|
models={
|
||||||
|
"DeepseekCoder-6.7B-Base": "deepseek-ai/deepseek-coder-6.7b-base",
|
||||||
|
"DeepseekCoder-6.7B-Chat": "deepseek-ai/deepseek-coder-6.7b-instruct",
|
||||||
|
"DeepseekCoder-33B-Base": "deepseek-ai/deepseek-coder-33b-base",
|
||||||
|
"DeepseekCoder-33B-Chat": "deepseek-ai/deepseek-coder-33b-instruct"
|
||||||
|
},
|
||||||
|
template="deepseekcoder"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
register_model_group(
|
register_model_group(
|
||||||
models={
|
models={
|
||||||
"Falcon-7B": "tiiuae/falcon-7b",
|
"Falcon-7B": "tiiuae/falcon-7b",
|
||||||
|
@ -214,14 +236,22 @@ register_model_group(
|
||||||
|
|
||||||
register_model_group(
|
register_model_group(
|
||||||
models={
|
models={
|
||||||
|
"Qwen-1.8B": "Qwen/Qwen-1_8B",
|
||||||
"Qwen-7B": "Qwen/Qwen-7B",
|
"Qwen-7B": "Qwen/Qwen-7B",
|
||||||
"Qwen-14B": "Qwen/Qwen-14B",
|
"Qwen-14B": "Qwen/Qwen-14B",
|
||||||
|
"Qwen-72B": "Qwen/Qwen-72B",
|
||||||
|
"Qwen-1.8B-Chat": "Qwen/Qwen-1_8B-Chat",
|
||||||
"Qwen-7B-Chat": "Qwen/Qwen-7B-Chat",
|
"Qwen-7B-Chat": "Qwen/Qwen-7B-Chat",
|
||||||
"Qwen-14B-Chat": "Qwen/Qwen-14B-Chat",
|
"Qwen-14B-Chat": "Qwen/Qwen-14B-Chat",
|
||||||
|
"Qwen-72B-Chat": "Qwen/Qwen-72B-Chat",
|
||||||
|
"Qwen-1.8B-int8-Chat": "Qwen/Qwen-1_8B-Chat-Int8",
|
||||||
|
"Qwen-1.8B-int4-Chat": "Qwen/Qwen-1_8B-Chat-Int4",
|
||||||
"Qwen-7B-int8-Chat": "Qwen/Qwen-7B-Chat-Int8",
|
"Qwen-7B-int8-Chat": "Qwen/Qwen-7B-Chat-Int8",
|
||||||
"Qwen-7B-int4-Chat": "Qwen/Qwen-7B-Chat-Int4",
|
"Qwen-7B-int4-Chat": "Qwen/Qwen-7B-Chat-Int4",
|
||||||
"Qwen-14B-int8-Chat": "Qwen/Qwen-14B-Chat-Int8",
|
"Qwen-14B-int8-Chat": "Qwen/Qwen-14B-Chat-Int8",
|
||||||
"Qwen-14B-int4-Chat": "Qwen/Qwen-14B-Chat-Int4"
|
"Qwen-14B-int4-Chat": "Qwen/Qwen-14B-Chat-Int4",
|
||||||
|
"Qwen-72B-int8-Chat": "Qwen/Qwen-72B-Chat-Int8",
|
||||||
|
"Qwen-72B-int4-Chat": "Qwen/Qwen-72B-Chat-Int4"
|
||||||
},
|
},
|
||||||
module="c_attn",
|
module="c_attn",
|
||||||
template="qwen"
|
template="qwen"
|
||||||
|
|
Loading…
Reference in New Issue