forked from p04798526/LLaMA-Factory-Mirror
lint
This commit is contained in:
parent
aacc823b0f
commit
88a1bc9773
|
@ -14,7 +14,7 @@ indent-width = 4
|
|||
ignore = ["C408", "C901", "E501", "E731", "E741", "W605"]
|
||||
select = ["C", "E", "F", "I", "W"]
|
||||
|
||||
[tool.ruff.isort]
|
||||
[tool.ruff.lint.isort]
|
||||
lines-after-imports = 2
|
||||
known-first-party = ["llmtuner"]
|
||||
|
||||
|
|
|
@ -75,8 +75,7 @@ class Formatter(ABC):
|
|||
tool_format: Literal["default"] = "default"
|
||||
|
||||
@abstractmethod
|
||||
def apply(self, **kwargs) -> SLOTS:
|
||||
...
|
||||
def apply(self, **kwargs) -> SLOTS: ...
|
||||
|
||||
def extract(self, content: str) -> Union[str, Tuple[str, str]]:
|
||||
raise NotImplementedError
|
||||
|
|
|
@ -11,7 +11,14 @@ DEFAULT_MODULE = defaultdict(str)
|
|||
|
||||
DEFAULT_TEMPLATE = defaultdict(str)
|
||||
|
||||
FILEEXT2TYPE = {"arrow": "arrow", "csv": "csv", "json": "json", "jsonl": "json", "parquet": "parquet", "txt": "text"}
|
||||
FILEEXT2TYPE = {
|
||||
"arrow": "arrow",
|
||||
"csv": "csv",
|
||||
"json": "json",
|
||||
"jsonl": "json",
|
||||
"parquet": "parquet",
|
||||
"txt": "text",
|
||||
}
|
||||
|
||||
IGNORE_INDEX = -100
|
||||
|
||||
|
@ -46,7 +53,9 @@ class DownloadSource(str, Enum):
|
|||
|
||||
|
||||
def register_model_group(
|
||||
models: Dict[str, Dict[DownloadSource, str]], module: Optional[str] = None, template: Optional[str] = None
|
||||
models: Dict[str, Dict[DownloadSource, str]],
|
||||
module: Optional[str] = None,
|
||||
template: Optional[str] = None,
|
||||
) -> None:
|
||||
prefix = None
|
||||
for name, path in models.items():
|
||||
|
@ -235,8 +244,12 @@ register_model_group(
|
|||
DownloadSource.DEFAULT: "deepseek-ai/deepseek-llm-67b-chat",
|
||||
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-llm-67b-chat",
|
||||
},
|
||||
"DeepSeek-Math-7B-Base": {DownloadSource.DEFAULT: "deepseek-ai/deepseek-math-7b-base"},
|
||||
"DeepSeek-Math-7B-Chat": {DownloadSource.DEFAULT: "deepseek-ai/deepseek-math-7b-instruct"},
|
||||
"DeepSeek-Math-7B-Base": {
|
||||
DownloadSource.DEFAULT: "deepseek-ai/deepseek-math-7b-base",
|
||||
},
|
||||
"DeepSeek-Math-7B-Chat": {
|
||||
DownloadSource.DEFAULT: "deepseek-ai/deepseek-math-7b-instruct",
|
||||
},
|
||||
"DeepSeek-MoE-16B-Base": {
|
||||
DownloadSource.DEFAULT: "deepseek-ai/deepseek-moe-16b-base",
|
||||
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-moe-16b-base",
|
||||
|
@ -256,7 +269,9 @@ register_model_group(
|
|||
DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-6.7b-base",
|
||||
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-6.7b-base",
|
||||
},
|
||||
"DeepSeekCoder-7B-Base": {DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-7b-base-v1.5"},
|
||||
"DeepSeekCoder-7B-Base": {
|
||||
DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-7b-base-v1.5",
|
||||
},
|
||||
"DeepSeekCoder-33B-Base": {
|
||||
DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-33b-base",
|
||||
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-33b-base",
|
||||
|
@ -265,7 +280,9 @@ register_model_group(
|
|||
DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-6.7b-instruct",
|
||||
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-6.7b-instruct",
|
||||
},
|
||||
"DeepSeekCoder-7B-Chat": {DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-7b-instruct-v1.5"},
|
||||
"DeepSeekCoder-7B-Chat": {
|
||||
DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-7b-instruct-v1.5",
|
||||
},
|
||||
"DeepSeekCoder-33B-Chat": {
|
||||
DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-33b-instruct",
|
||||
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-33b-instruct",
|
||||
|
@ -367,7 +384,10 @@ register_model_group(
|
|||
|
||||
register_model_group(
|
||||
models={
|
||||
"LLaMA-7B": {DownloadSource.DEFAULT: "huggyllama/llama-7b", DownloadSource.MODELSCOPE: "skyline2006/llama-7b"},
|
||||
"LLaMA-7B": {
|
||||
DownloadSource.DEFAULT: "huggyllama/llama-7b",
|
||||
DownloadSource.MODELSCOPE: "skyline2006/llama-7b",
|
||||
},
|
||||
"LLaMA-13B": {
|
||||
DownloadSource.DEFAULT: "huggyllama/llama-13b",
|
||||
DownloadSource.MODELSCOPE: "skyline2006/llama-13b",
|
||||
|
@ -489,18 +509,36 @@ register_model_group(
|
|||
|
||||
register_model_group(
|
||||
models={
|
||||
"Phi-1.5-1.3B": {DownloadSource.DEFAULT: "microsoft/phi-1_5", DownloadSource.MODELSCOPE: "allspace/PHI_1-5"},
|
||||
"Phi-2-2.7B": {DownloadSource.DEFAULT: "microsoft/phi-2", DownloadSource.MODELSCOPE: "AI-ModelScope/phi-2"},
|
||||
"Phi-1.5-1.3B": {
|
||||
DownloadSource.DEFAULT: "microsoft/phi-1_5",
|
||||
DownloadSource.MODELSCOPE: "allspace/PHI_1-5",
|
||||
},
|
||||
"Phi-2-2.7B": {
|
||||
DownloadSource.DEFAULT: "microsoft/phi-2",
|
||||
DownloadSource.MODELSCOPE: "AI-ModelScope/phi-2",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
register_model_group(
|
||||
models={
|
||||
"Qwen-1.8B": {DownloadSource.DEFAULT: "Qwen/Qwen-1_8B", DownloadSource.MODELSCOPE: "qwen/Qwen-1_8B"},
|
||||
"Qwen-7B": {DownloadSource.DEFAULT: "Qwen/Qwen-7B", DownloadSource.MODELSCOPE: "qwen/Qwen-7B"},
|
||||
"Qwen-14B": {DownloadSource.DEFAULT: "Qwen/Qwen-14B", DownloadSource.MODELSCOPE: "qwen/Qwen-14B"},
|
||||
"Qwen-72B": {DownloadSource.DEFAULT: "Qwen/Qwen-72B", DownloadSource.MODELSCOPE: "qwen/Qwen-72B"},
|
||||
"Qwen-1.8B": {
|
||||
DownloadSource.DEFAULT: "Qwen/Qwen-1_8B",
|
||||
DownloadSource.MODELSCOPE: "qwen/Qwen-1_8B",
|
||||
},
|
||||
"Qwen-7B": {
|
||||
DownloadSource.DEFAULT: "Qwen/Qwen-7B",
|
||||
DownloadSource.MODELSCOPE: "qwen/Qwen-7B",
|
||||
},
|
||||
"Qwen-14B": {
|
||||
DownloadSource.DEFAULT: "Qwen/Qwen-14B",
|
||||
DownloadSource.MODELSCOPE: "qwen/Qwen-14B",
|
||||
},
|
||||
"Qwen-72B": {
|
||||
DownloadSource.DEFAULT: "Qwen/Qwen-72B",
|
||||
DownloadSource.MODELSCOPE: "qwen/Qwen-72B",
|
||||
},
|
||||
"Qwen-1.8B-Chat": {
|
||||
DownloadSource.DEFAULT: "Qwen/Qwen-1_8B-Chat",
|
||||
DownloadSource.MODELSCOPE: "qwen/Qwen-1_8B-Chat",
|
||||
|
@ -657,7 +695,9 @@ register_model_group(
|
|||
|
||||
register_model_group(
|
||||
models={
|
||||
"SOLAR-10.7B": {DownloadSource.DEFAULT: "upstage/SOLAR-10.7B-v1.0"},
|
||||
"SOLAR-10.7B": {
|
||||
DownloadSource.DEFAULT: "upstage/SOLAR-10.7B-v1.0",
|
||||
},
|
||||
"SOLAR-10.7B-Chat": {
|
||||
DownloadSource.DEFAULT: "upstage/SOLAR-10.7B-Instruct-v1.0",
|
||||
DownloadSource.MODELSCOPE: "AI-ModelScope/SOLAR-10.7B-Instruct-v1.0",
|
||||
|
@ -694,10 +734,18 @@ register_model_group(
|
|||
|
||||
register_model_group(
|
||||
models={
|
||||
"XuanYuan-70B": {DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B"},
|
||||
"XuanYuan-70B-Chat": {DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B-Chat"},
|
||||
"XuanYuan-70B-int8-Chat": {DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B-Chat-8bit"},
|
||||
"XuanYuan-70B-int4-Chat": {DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B-Chat-4bit"},
|
||||
"XuanYuan-70B": {
|
||||
DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B",
|
||||
},
|
||||
"XuanYuan-70B-Chat": {
|
||||
DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B-Chat",
|
||||
},
|
||||
"XuanYuan-70B-int8-Chat": {
|
||||
DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B-Chat-8bit",
|
||||
},
|
||||
"XuanYuan-70B-int4-Chat": {
|
||||
DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B-Chat-4bit",
|
||||
},
|
||||
},
|
||||
template="xuanyuan",
|
||||
)
|
||||
|
@ -705,9 +753,18 @@ register_model_group(
|
|||
|
||||
register_model_group(
|
||||
models={
|
||||
"XVERSE-7B": {DownloadSource.DEFAULT: "xverse/XVERSE-7B", DownloadSource.MODELSCOPE: "xverse/XVERSE-7B"},
|
||||
"XVERSE-13B": {DownloadSource.DEFAULT: "xverse/XVERSE-13B", DownloadSource.MODELSCOPE: "xverse/XVERSE-13B"},
|
||||
"XVERSE-65B": {DownloadSource.DEFAULT: "xverse/XVERSE-65B", DownloadSource.MODELSCOPE: "xverse/XVERSE-65B"},
|
||||
"XVERSE-7B": {
|
||||
DownloadSource.DEFAULT: "xverse/XVERSE-7B",
|
||||
DownloadSource.MODELSCOPE: "xverse/XVERSE-7B",
|
||||
},
|
||||
"XVERSE-13B": {
|
||||
DownloadSource.DEFAULT: "xverse/XVERSE-13B",
|
||||
DownloadSource.MODELSCOPE: "xverse/XVERSE-13B",
|
||||
},
|
||||
"XVERSE-65B": {
|
||||
DownloadSource.DEFAULT: "xverse/XVERSE-65B",
|
||||
DownloadSource.MODELSCOPE: "xverse/XVERSE-65B",
|
||||
},
|
||||
"XVERSE-65B-2": {
|
||||
DownloadSource.DEFAULT: "xverse/XVERSE-65B-2",
|
||||
DownloadSource.MODELSCOPE: "xverse/XVERSE-65B-2",
|
||||
|
@ -746,10 +803,22 @@ register_model_group(
|
|||
|
||||
register_model_group(
|
||||
models={
|
||||
"Yi-6B": {DownloadSource.DEFAULT: "01-ai/Yi-6B", DownloadSource.MODELSCOPE: "01ai/Yi-6B"},
|
||||
"Yi-34B": {DownloadSource.DEFAULT: "01-ai/Yi-34B", DownloadSource.MODELSCOPE: "01ai/Yi-34B"},
|
||||
"Yi-6B-Chat": {DownloadSource.DEFAULT: "01-ai/Yi-6B-Chat", DownloadSource.MODELSCOPE: "01ai/Yi-6B-Chat"},
|
||||
"Yi-34B-Chat": {DownloadSource.DEFAULT: "01-ai/Yi-34B-Chat", DownloadSource.MODELSCOPE: "01ai/Yi-34B-Chat"},
|
||||
"Yi-6B": {
|
||||
DownloadSource.DEFAULT: "01-ai/Yi-6B",
|
||||
DownloadSource.MODELSCOPE: "01ai/Yi-6B",
|
||||
},
|
||||
"Yi-34B": {
|
||||
DownloadSource.DEFAULT: "01-ai/Yi-34B",
|
||||
DownloadSource.MODELSCOPE: "01ai/Yi-34B",
|
||||
},
|
||||
"Yi-6B-Chat": {
|
||||
DownloadSource.DEFAULT: "01-ai/Yi-6B-Chat",
|
||||
DownloadSource.MODELSCOPE: "01ai/Yi-6B-Chat",
|
||||
},
|
||||
"Yi-34B-Chat": {
|
||||
DownloadSource.DEFAULT: "01-ai/Yi-34B-Chat",
|
||||
DownloadSource.MODELSCOPE: "01ai/Yi-34B-Chat",
|
||||
},
|
||||
"Yi-6B-int8-Chat": {
|
||||
DownloadSource.DEFAULT: "01-ai/Yi-6B-Chat-8bits",
|
||||
DownloadSource.MODELSCOPE: "01ai/Yi-6B-Chat-8bits",
|
||||
|
|
|
@ -7,6 +7,7 @@ class DataArguments:
|
|||
r"""
|
||||
Arguments pertaining to what data we are going to input our model for training and evaluation.
|
||||
"""
|
||||
|
||||
template: Optional[str] = field(
|
||||
default=None, metadata={"help": "Which template to use for constructing prompts in training and inference."}
|
||||
)
|
||||
|
|
|
@ -10,6 +10,7 @@ class EvaluationArguments:
|
|||
r"""
|
||||
Arguments pertaining to specify the evaluation parameters.
|
||||
"""
|
||||
|
||||
task: str = field(metadata={"help": "Name of the evaluation task."})
|
||||
task_dir: Optional[str] = field(
|
||||
default="evaluation", metadata={"help": "Path to the folder containing the evaluation datasets."}
|
||||
|
|
|
@ -8,6 +8,7 @@ class FreezeArguments:
|
|||
r"""
|
||||
Arguments pertaining to the freeze (partial-parameter) training.
|
||||
"""
|
||||
|
||||
name_module_trainable: Optional[str] = field(
|
||||
default="mlp",
|
||||
metadata={
|
||||
|
@ -31,6 +32,7 @@ class LoraArguments:
|
|||
r"""
|
||||
Arguments pertaining to the LoRA training.
|
||||
"""
|
||||
|
||||
additional_target: Optional[str] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
|
@ -67,6 +69,7 @@ class RLHFArguments:
|
|||
r"""
|
||||
Arguments pertaining to the PPO and DPO training.
|
||||
"""
|
||||
|
||||
dpo_beta: Optional[float] = field(default=0.1, metadata={"help": "The beta parameter for the DPO loss."})
|
||||
dpo_loss: Optional[Literal["sigmoid", "hinge", "ipo", "kto"]] = field(
|
||||
default="sigmoid", metadata={"help": "The type of DPO loss to use."}
|
||||
|
@ -122,6 +125,7 @@ class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments):
|
|||
r"""
|
||||
Arguments pertaining to which techniques we are going to fine-tuning with.
|
||||
"""
|
||||
|
||||
stage: Optional[Literal["pt", "sft", "rm", "ppo", "dpo"]] = field(
|
||||
default="sft", metadata={"help": "Which stage will be performed in training."}
|
||||
)
|
||||
|
|
|
@ -7,6 +7,7 @@ class GeneratingArguments:
|
|||
r"""
|
||||
Arguments pertaining to specify the decoding parameters.
|
||||
"""
|
||||
|
||||
do_sample: Optional[bool] = field(
|
||||
default=True, metadata={"help": "Whether or not to use sampling, use greedy decoding otherwise."}
|
||||
)
|
||||
|
|
|
@ -7,6 +7,7 @@ class ModelArguments:
|
|||
r"""
|
||||
Arguments pertaining to which model/config/tokenizer we are going to fine-tune.
|
||||
"""
|
||||
|
||||
model_name_or_path: str = field(
|
||||
metadata={"help": "Path to the model weight or identifier from huggingface.co/models or modelscope.cn/models."}
|
||||
)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue