support autogptq in llama board #246
This commit is contained in:
parent
93f64ce9a8
commit
71389be37c
|
@ -482,7 +482,7 @@ python src/export_model.py \
|
|||
> Merging LoRA weights into a quantized model is not supported.
|
||||
|
||||
> [!TIP]
|
||||
> Use `--export_quantization_bit 4` and `--export_quantization_dataset data/wiki_demo.txt` to quantize the model.
|
||||
> Use `--export_quantization_bit 4` and `--export_quantization_dataset data/c4_demo.json` to quantize the model.
|
||||
|
||||
### API Demo
|
||||
|
||||
|
|
|
@ -482,7 +482,7 @@ python src/export_model.py \
|
|||
> 尚不支持量化模型的 LoRA 权重合并及导出。
|
||||
|
||||
> [!TIP]
|
||||
> 使用 `--export_quantization_bit 4` 和 `--export_quantization_dataset data/wiki_demo.txt` 量化导出模型。
|
||||
> 使用 `--export_quantization_bit 4` 和 `--export_quantization_dataset data/c4_demo.json` 量化导出模型。
|
||||
|
||||
### API 服务
|
||||
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -239,6 +239,13 @@
|
|||
"prompt": "text"
|
||||
}
|
||||
},
|
||||
"c4_demo": {
|
||||
"file_name": "c4_demo.json",
|
||||
"file_sha1": "a5a0c86759732f9a5238e447fecd74f28a66cca8",
|
||||
"columns": {
|
||||
"prompt": "text"
|
||||
}
|
||||
},
|
||||
"refinedweb": {
|
||||
"hf_hub_url": "tiiuae/falcon-refinedweb",
|
||||
"columns": {
|
||||
|
|
|
@ -26,6 +26,8 @@ LOG_FILE_NAME = "trainer_log.jsonl"
|
|||
|
||||
METHODS = ["full", "freeze", "lora"]
|
||||
|
||||
PEFT_METHODS = ["lora"]
|
||||
|
||||
SUBJECTS = ["Average", "STEM", "Social Sciences", "Humanities", "Other"]
|
||||
|
||||
SUPPORTED_MODELS = OrderedDict()
|
||||
|
|
|
@ -1,13 +1,9 @@
|
|||
import gc
|
||||
import os
|
||||
import sys
|
||||
import torch
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple
|
||||
from typing import TYPE_CHECKING, Tuple
|
||||
from transformers import InfNanRemoveLogitsProcessor, LogitsProcessorList
|
||||
|
||||
from llmtuner.extras.logging import get_logger
|
||||
logger = get_logger(__name__)
|
||||
|
||||
try:
|
||||
from transformers.utils import (
|
||||
is_torch_bf16_cpu_available,
|
||||
|
@ -106,22 +102,6 @@ def infer_optim_dtype(model_dtype: torch.dtype) -> torch.dtype:
|
|||
return torch.float32
|
||||
|
||||
|
||||
def parse_args(parser: "HfArgumentParser", args: Optional[Dict[str, Any]] = None) -> Tuple[Any]:
|
||||
if args is not None:
|
||||
return parser.parse_dict(args)
|
||||
elif len(sys.argv) == 2 and sys.argv[1].endswith(".yaml"):
|
||||
return parser.parse_yaml_file(os.path.abspath(sys.argv[1]))
|
||||
elif len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
|
||||
return parser.parse_json_file(os.path.abspath(sys.argv[1]))
|
||||
else:
|
||||
(*parsed_args, unknown_args) = parser.parse_args_into_dataclasses(return_remaining_strings=True)
|
||||
if unknown_args:
|
||||
logger.warning(parser.format_help())
|
||||
logger.error(f'\nGot unknown args, potentially deprecated arguments: {unknown_args}\n')
|
||||
raise ValueError(f"Some specified arguments are not used by the HfArgumentParser: {unknown_args}")
|
||||
return (*parsed_args,)
|
||||
|
||||
|
||||
def torch_gc() -> None:
|
||||
r"""
|
||||
Collects GPU memory.
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
import os
|
||||
import sys
|
||||
import torch
|
||||
import logging
|
||||
import datasets
|
||||
import transformers
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
|
@ -7,7 +9,6 @@ from transformers import HfArgumentParser, Seq2SeqTrainingArguments
|
|||
from transformers.trainer_utils import get_last_checkpoint
|
||||
|
||||
from llmtuner.extras.logging import get_logger
|
||||
from llmtuner.extras.misc import parse_args
|
||||
from llmtuner.hparams import (
|
||||
ModelArguments,
|
||||
DataArguments,
|
||||
|
@ -40,6 +41,33 @@ _EVAL_CLS = Tuple[
|
|||
]
|
||||
|
||||
|
||||
def _parse_args(parser: "HfArgumentParser", args: Optional[Dict[str, Any]] = None) -> Tuple[Any]:
|
||||
if args is not None:
|
||||
return parser.parse_dict(args)
|
||||
|
||||
if len(sys.argv) == 2 and sys.argv[1].endswith(".yaml"):
|
||||
return parser.parse_yaml_file(os.path.abspath(sys.argv[1]))
|
||||
|
||||
if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
|
||||
return parser.parse_json_file(os.path.abspath(sys.argv[1]))
|
||||
|
||||
(*parsed_args, unknown_args) = parser.parse_args_into_dataclasses(return_remaining_strings=True)
|
||||
|
||||
if unknown_args:
|
||||
print(parser.format_help())
|
||||
print("Got unknown args, potentially deprecated arguments: {}".format(unknown_args))
|
||||
raise ValueError("Some specified arguments are not used by the HfArgumentParser: {}".format(unknown_args))
|
||||
|
||||
return (*parsed_args,)
|
||||
|
||||
|
||||
def _set_transformers_logging(log_level: Optional[int] = logging.INFO) -> None:
|
||||
datasets.utils.logging.set_verbosity(log_level)
|
||||
transformers.utils.logging.set_verbosity(log_level)
|
||||
transformers.utils.logging.enable_default_handler()
|
||||
transformers.utils.logging.enable_explicit_format()
|
||||
|
||||
|
||||
def _verify_model_args(model_args: "ModelArguments", finetuning_args: "FinetuningArguments") -> None:
|
||||
if model_args.quantization_bit is not None:
|
||||
if finetuning_args.finetuning_type != "lora":
|
||||
|
@ -56,34 +84,28 @@ def _verify_model_args(model_args: "ModelArguments", finetuning_args: "Finetunin
|
|||
raise ValueError("Quantized model only accepts a single adapter. Merge them first.")
|
||||
|
||||
|
||||
def parse_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS:
|
||||
def _parse_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS:
|
||||
parser = HfArgumentParser(_TRAIN_ARGS)
|
||||
return parse_args(parser, args)
|
||||
return _parse_args(parser, args)
|
||||
|
||||
|
||||
def parse_infer_args(args: Optional[Dict[str, Any]] = None) -> _INFER_CLS:
|
||||
def _parse_infer_args(args: Optional[Dict[str, Any]] = None) -> _INFER_CLS:
|
||||
parser = HfArgumentParser(_INFER_ARGS)
|
||||
return parse_args(parser, args)
|
||||
return _parse_args(parser, args)
|
||||
|
||||
|
||||
def parse_eval_args(args: Optional[Dict[str, Any]] = None) -> _EVAL_CLS:
|
||||
def _parse_eval_args(args: Optional[Dict[str, Any]] = None) -> _EVAL_CLS:
|
||||
parser = HfArgumentParser(_EVAL_ARGS)
|
||||
return parse_args(parser, args)
|
||||
return _parse_args(parser, args)
|
||||
|
||||
|
||||
def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS:
|
||||
model_args, data_args, training_args, finetuning_args, generating_args = parse_train_args(args)
|
||||
model_args, data_args, training_args, finetuning_args, generating_args = _parse_train_args(args)
|
||||
|
||||
# Setup logging
|
||||
if training_args.should_log:
|
||||
# The default of training_args.log_level is passive, so we set log level at info here to have that default.
|
||||
transformers.utils.logging.set_verbosity_info()
|
||||
|
||||
log_level = training_args.get_process_log_level()
|
||||
datasets.utils.logging.set_verbosity(log_level)
|
||||
transformers.utils.logging.set_verbosity(log_level)
|
||||
transformers.utils.logging.enable_default_handler()
|
||||
transformers.utils.logging.enable_explicit_format()
|
||||
log_level = training_args.get_process_log_level()
|
||||
_set_transformers_logging(log_level)
|
||||
|
||||
# Check arguments
|
||||
data_args.init_for_training(training_args.seed)
|
||||
|
@ -193,7 +215,8 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS:
|
|||
|
||||
|
||||
def get_infer_args(args: Optional[Dict[str, Any]] = None) -> _INFER_CLS:
|
||||
model_args, data_args, finetuning_args, generating_args = parse_infer_args(args)
|
||||
model_args, data_args, finetuning_args, generating_args = _parse_infer_args(args)
|
||||
_set_transformers_logging()
|
||||
|
||||
if data_args.template is None:
|
||||
raise ValueError("Please specify which `template` to use.")
|
||||
|
@ -204,7 +227,8 @@ def get_infer_args(args: Optional[Dict[str, Any]] = None) -> _INFER_CLS:
|
|||
|
||||
|
||||
def get_eval_args(args: Optional[Dict[str, Any]] = None) -> _EVAL_CLS:
|
||||
model_args, data_args, eval_args, finetuning_args = parse_eval_args(args)
|
||||
model_args, data_args, eval_args, finetuning_args = _parse_eval_args(args)
|
||||
_set_transformers_logging()
|
||||
|
||||
if data_args.template is None:
|
||||
raise ValueError("Please specify which `template` to use.")
|
||||
|
|
|
@ -76,8 +76,13 @@ def configure_quantization(
|
|||
if finetuning_args.export_quantization_bit is not None: # gptq
|
||||
require_version("optimum>=1.16.0", "To fix: pip install optimum>=1.16.0")
|
||||
require_version("auto_gptq>=0.5.0", "To fix: pip install auto_gptq>=0.5.0")
|
||||
|
||||
if getattr(config, "model_type", None) == "chatglm":
|
||||
raise ValueError("ChatGLM model is not supported.")
|
||||
|
||||
config_kwargs["quantization_config"] = GPTQConfig(
|
||||
bits=finetuning_args.export_quantization_bit,
|
||||
tokenizer=tokenizer,
|
||||
dataset=get_quantization_dataset(tokenizer, model_args, finetuning_args)
|
||||
)
|
||||
config_kwargs["device_map"] = "auto"
|
||||
|
|
|
@ -7,6 +7,7 @@ from peft.utils import WEIGHTS_NAME, SAFETENSORS_WEIGHTS_NAME
|
|||
from llmtuner.extras.constants import (
|
||||
DEFAULT_MODULE,
|
||||
DEFAULT_TEMPLATE,
|
||||
PEFT_METHODS,
|
||||
SUPPORTED_MODELS,
|
||||
TRAINING_STAGES,
|
||||
DownloadSource
|
||||
|
@ -77,8 +78,11 @@ def get_template(model_name: str) -> str:
|
|||
|
||||
|
||||
def list_adapters(model_name: str, finetuning_type: str) -> Dict[str, Any]:
|
||||
if finetuning_type not in PEFT_METHODS:
|
||||
return gr.update(value=[], choices=[], interactive=False)
|
||||
|
||||
adapters = []
|
||||
if model_name and finetuning_type == "lora": # full and freeze have no adapter
|
||||
if model_name and finetuning_type == "lora":
|
||||
save_dir = get_save_dir(model_name, finetuning_type)
|
||||
if save_dir and os.path.isdir(save_dir):
|
||||
for adapter in os.listdir(save_dir):
|
||||
|
@ -87,7 +91,7 @@ def list_adapters(model_name: str, finetuning_type: str) -> Dict[str, Any]:
|
|||
and any([os.path.isfile(os.path.join(save_dir, adapter, name)) for name in ADAPTER_NAMES])
|
||||
):
|
||||
adapters.append(adapter)
|
||||
return gr.update(value=[], choices=adapters)
|
||||
return gr.update(value=[], choices=adapters, interactive=True)
|
||||
|
||||
|
||||
def load_dataset_info(dataset_dir: str) -> Dict[str, Dict[str, Any]]:
|
||||
|
|
|
@ -21,8 +21,11 @@ def next_page(page_index: int, total_num: int) -> int:
|
|||
|
||||
|
||||
def can_preview(dataset_dir: str, dataset: list) -> Dict[str, Any]:
|
||||
with open(os.path.join(dataset_dir, DATA_CONFIG), "r", encoding="utf-8") as f:
|
||||
dataset_info = json.load(f)
|
||||
try:
|
||||
with open(os.path.join(dataset_dir, DATA_CONFIG), "r", encoding="utf-8") as f:
|
||||
dataset_info = json.load(f)
|
||||
except:
|
||||
return gr.update(interactive=False)
|
||||
|
||||
if (
|
||||
len(dataset) > 0
|
||||
|
|
|
@ -10,6 +10,9 @@ if TYPE_CHECKING:
|
|||
from llmtuner.webui.engine import Engine
|
||||
|
||||
|
||||
GPTQ_BITS = ["8", "4", "3", "2"]
|
||||
|
||||
|
||||
def save_model(
|
||||
lang: str,
|
||||
model_name: str,
|
||||
|
@ -18,6 +21,8 @@ def save_model(
|
|||
finetuning_type: str,
|
||||
template: str,
|
||||
max_shard_size: int,
|
||||
export_quantization_bit: int,
|
||||
export_quantization_dataset: str,
|
||||
export_dir: str
|
||||
) -> Generator[str, None, None]:
|
||||
error = ""
|
||||
|
@ -25,23 +30,32 @@ def save_model(
|
|||
error = ALERTS["err_no_model"][lang]
|
||||
elif not model_path:
|
||||
error = ALERTS["err_no_path"][lang]
|
||||
elif not adapter_path:
|
||||
error = ALERTS["err_no_adapter"][lang]
|
||||
elif not export_dir:
|
||||
error = ALERTS["err_no_export_dir"][lang]
|
||||
elif export_quantization_bit in GPTQ_BITS and not export_quantization_dataset:
|
||||
error = ALERTS["err_no_dataset"][lang]
|
||||
elif export_quantization_bit not in GPTQ_BITS and not adapter_path:
|
||||
error = ALERTS["err_no_adapter"][lang]
|
||||
|
||||
if error:
|
||||
gr.Warning(error)
|
||||
yield error
|
||||
return
|
||||
|
||||
if adapter_path:
|
||||
adapter_name_or_path = ",".join([get_save_dir(model_name, finetuning_type, adapter) for adapter in adapter_path])
|
||||
else:
|
||||
adapter_name_or_path = None
|
||||
|
||||
args = dict(
|
||||
model_name_or_path=model_path,
|
||||
adapter_name_or_path=",".join([get_save_dir(model_name, finetuning_type, adapter) for adapter in adapter_path]),
|
||||
adapter_name_or_path=adapter_name_or_path,
|
||||
finetuning_type=finetuning_type,
|
||||
template=template,
|
||||
export_dir=export_dir,
|
||||
export_size=max_shard_size
|
||||
export_size=max_shard_size,
|
||||
export_quantization_bit=int(export_quantization_bit) if export_quantization_bit in GPTQ_BITS else None,
|
||||
export_quantization_dataset=export_quantization_dataset
|
||||
)
|
||||
|
||||
yield ALERTS["info_exporting"][lang]
|
||||
|
@ -51,9 +65,11 @@ def save_model(
|
|||
|
||||
def create_export_tab(engine: "Engine") -> Dict[str, "Component"]:
|
||||
with gr.Row():
|
||||
export_dir = gr.Textbox()
|
||||
max_shard_size = gr.Slider(value=1, minimum=1, maximum=100)
|
||||
export_quantization_bit = gr.Dropdown(choices=["none", "8", "4", "3", "2"], value="none")
|
||||
export_quantization_dataset = gr.Textbox(value="data/c4_demo.json")
|
||||
|
||||
export_dir = gr.Textbox()
|
||||
export_btn = gr.Button()
|
||||
info_box = gr.Textbox(show_label=False, interactive=False)
|
||||
|
||||
|
@ -67,14 +83,18 @@ def create_export_tab(engine: "Engine") -> Dict[str, "Component"]:
|
|||
engine.manager.get_elem_by_name("top.finetuning_type"),
|
||||
engine.manager.get_elem_by_name("top.template"),
|
||||
max_shard_size,
|
||||
export_quantization_bit,
|
||||
export_quantization_dataset,
|
||||
export_dir
|
||||
],
|
||||
[info_box]
|
||||
)
|
||||
|
||||
return dict(
|
||||
export_dir=export_dir,
|
||||
max_shard_size=max_shard_size,
|
||||
export_quantization_bit=export_quantization_bit,
|
||||
export_quantization_dataset=export_quantization_dataset,
|
||||
export_dir=export_dir,
|
||||
export_btn=export_btn,
|
||||
info_box=info_box
|
||||
)
|
||||
|
|
|
@ -20,7 +20,7 @@ def create_top() -> Dict[str, "Component"]:
|
|||
|
||||
with gr.Row():
|
||||
finetuning_type = gr.Dropdown(choices=METHODS, value="lora", scale=1)
|
||||
adapter_path = gr.Dropdown(multiselect=True, scale=5)
|
||||
adapter_path = gr.Dropdown(multiselect=True, scale=5, allow_custom_value=True)
|
||||
refresh_btn = gr.Button(scale=1)
|
||||
|
||||
with gr.Accordion(label="Advanced config", open=False) as advanced_tab:
|
||||
|
|
|
@ -94,7 +94,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
|
|||
with gr.Accordion(label="RLHF config", open=False) as rlhf_tab:
|
||||
with gr.Row():
|
||||
dpo_beta = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, scale=1)
|
||||
reward_model = gr.Dropdown(scale=3)
|
||||
reward_model = gr.Dropdown(scale=3, allow_custom_value=True)
|
||||
refresh_btn = gr.Button(scale=1)
|
||||
|
||||
refresh_btn.click(
|
||||
|
|
|
@ -432,11 +432,11 @@ LOCALES = {
|
|||
"reward_model": {
|
||||
"en": {
|
||||
"label": "Reward model",
|
||||
"info": "Checkpoint of the reward model for PPO training. (Needs to refresh checkpoints)"
|
||||
"info": "Adapter of the reward model for PPO training. (Needs to refresh adapters)"
|
||||
},
|
||||
"zh": {
|
||||
"label": "奖励模型",
|
||||
"info": "PPO 训练中奖励模型的断点路径。(需要刷新断点)"
|
||||
"info": "PPO 训练中奖励模型的适配器路径。(需要刷新适配器)"
|
||||
}
|
||||
},
|
||||
"cmd_preview_btn": {
|
||||
|
@ -585,6 +585,36 @@ LOCALES = {
|
|||
"label": "温度系数"
|
||||
}
|
||||
},
|
||||
"max_shard_size": {
|
||||
"en": {
|
||||
"label": "Max shard size (GB)",
|
||||
"info": "The maximum size for a model file."
|
||||
},
|
||||
"zh": {
|
||||
"label": "最大分块大小(GB)",
|
||||
"info": "单个模型文件的最大大小。"
|
||||
}
|
||||
},
|
||||
"export_quantization_bit": {
|
||||
"en": {
|
||||
"label": "Export quantization bit.",
|
||||
"info": "Quantizing the exported model."
|
||||
},
|
||||
"zh": {
|
||||
"label": "导出量化等级",
|
||||
"info": "量化导出模型。"
|
||||
}
|
||||
},
|
||||
"export_quantization_dataset": {
|
||||
"en": {
|
||||
"label": "Export quantization dataset.",
|
||||
"info": "The calibration dataset used for quantization."
|
||||
},
|
||||
"zh": {
|
||||
"label": "导出量化数据集",
|
||||
"info": "量化过程中使用的校准数据集。"
|
||||
}
|
||||
},
|
||||
"export_dir": {
|
||||
"en": {
|
||||
"label": "Export dir",
|
||||
|
@ -595,16 +625,6 @@ LOCALES = {
|
|||
"info": "保存导出模型的文件夹路径。"
|
||||
}
|
||||
},
|
||||
"max_shard_size": {
|
||||
"en": {
|
||||
"label": "Max shard size (GB)",
|
||||
"info": "The maximum size for a model file."
|
||||
},
|
||||
"zh": {
|
||||
"label": "最大分块大小(GB)",
|
||||
"info": "模型文件的最大大小。"
|
||||
}
|
||||
},
|
||||
"export_btn": {
|
||||
"en": {
|
||||
"value": "Export"
|
||||
|
|
Loading…
Reference in New Issue