From fa5ab21ebc0ab738178c0c57578db3bda995ae06 Mon Sep 17 00:00:00 2001 From: hiyouga Date: Thu, 29 Feb 2024 00:34:19 +0800 Subject: [PATCH] release v0.5.3 --- README.md | 6 +- README_zh.md | 8 +-- src/llmtuner/__init__.py | 2 +- src/llmtuner/hparams/parser.py | 3 - src/llmtuner/model/loader.py | 2 +- src/llmtuner/model/patcher.py | 18 ++++-- src/llmtuner/webui/components/train.py | 30 +++++---- src/llmtuner/webui/locales.py | 89 ++++++++++++++++++-------- src/llmtuner/webui/runner.py | 14 ++-- src/llmtuner/webui/utils.py | 11 ++-- 10 files changed, 116 insertions(+), 67 deletions(-) diff --git a/README.md b/README.md index 23f7a3ed..56939492 100644 --- a/README.md +++ b/README.md @@ -42,9 +42,9 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/6ba60acc-e2e2-4bec-b846 - **Various models**: LLaMA, Mistral, Mixtral-MoE, Qwen, Yi, Gemma, Baichuan, ChatGLM, Phi, etc. - **Integrated methods**: (Continuous) pre-training, supervised fine-tuning, reward modeling, PPO and DPO. -- **Scalable resources**: 32-bit full-tuning, 16-bit freeze tuning, 16-bit LoRA tuning, 2/4/8-bit QLoRA with AQLM/AWQ/GPTQ/LLM.int8. +- **Scalable resources**: 32-bit full-tuning, 16-bit freeze-tuning, 16-bit LoRA, 2/4/8-bit QLoRA via AQLM/AWQ/GPTQ/LLM.int8. - **Advanced algorithms**: DoRA, LongLoRA, LLaMA Pro, LoftQ, agent tuning. -- **Intriguing tricks**: FlashAttention-2, Unsloth, RoPE scaling, NEFTune, rsLoRA. +- **Practical tricks**: FlashAttention-2, Unsloth, RoPE scaling, NEFTune, rsLoRA. - **Experiment monitors**: LlamaBoard, TensorBoard, Wandb, MLflow, etc. ## Benchmark @@ -140,7 +140,7 @@ Please refer to [constants.py](src/llmtuner/extras/constants.py) for a full list ## Supported Training Approaches -| Approach | Full-parameter | Partial-parameter | LoRA | QLoRA | +| Approach | Full-tuning | Freeze-tuning | LoRA | QLoRA | | ---------------------- | ------------------ | ------------------ | ------------------ | ------------------ | | Pre-Training | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | | Supervised Fine-Tuning | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | diff --git a/README_zh.md b/README_zh.md index 7235321a..ca20650c 100644 --- a/README_zh.md +++ b/README_zh.md @@ -41,10 +41,10 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/6ba60acc-e2e2-4bec-b846 ## 项目特色 - **多种模型**:LLaMA、Mistral、Mixtral-MoE、Qwen、Yi、Gemma、Baichuan、ChatGLM、Phi 等等。 -- **集成方法**:(增量)预训练、指令监督微调、奖励模型训练、PPO 训练、DPO 训练。 -- **多种精度**:32 比特全参数训练、16 比特部分参数训练、16比特 LoRA 训练、基于 AQLM/AWQ/GPTQ/LLM.int8 的 2/4/8 比特 LoRA 训练。 -- **先进算法**: DoRA、LongLoRA、LLaMA Pro、LoftQ、agent tuning。 -- **新鲜技巧**:FlashAttention-2、Unsloth、RoPE scaling、NEFTune、rsLoRA。 +- **集成方法**:(增量)预训练、指令监督微调、奖励模型训练、PPO 训练和 DPO 训练。 +- **多种精度**:32 比特全参数微调、16 比特冻结微调、16 比特 LoRA 微调和基于 AQLM/AWQ/GPTQ/LLM.int8 的 2/4/8 比特 QLoRA 微调。 +- **先进算法**:DoRA、LongLoRA、LLaMA Pro、LoftQ 和 Agent 微调。 +- **实用技巧**:FlashAttention-2、Unsloth、RoPE scaling、NEFTune 和 rsLoRA。 - **实验监控**:LlamaBoard、TensorBoard、Wandb、MLflow 等等。 ## 性能指标 diff --git a/src/llmtuner/__init__.py b/src/llmtuner/__init__.py index 115fdb27..027f9ed7 100644 --- a/src/llmtuner/__init__.py +++ b/src/llmtuner/__init__.py @@ -7,5 +7,5 @@ from .train import export_model, run_exp from .webui import create_ui, create_web_demo -__version__ = "0.5.2" +__version__ = "0.5.3" __all__ = ["create_app", "ChatModel", "Evaluator", "export_model", "run_exp", "create_ui", "create_web_demo"] diff --git a/src/llmtuner/hparams/parser.py b/src/llmtuner/hparams/parser.py index 16e6c2c6..4a541a22 100644 --- a/src/llmtuner/hparams/parser.py +++ b/src/llmtuner/hparams/parser.py @@ -3,7 +3,6 @@ import os import sys from typing import Any, Dict, Optional, Tuple -import datasets import torch import transformers from transformers import HfArgumentParser, Seq2SeqTrainingArguments @@ -62,7 +61,6 @@ def _parse_args(parser: "HfArgumentParser", args: Optional[Dict[str, Any]] = Non def _set_transformers_logging(log_level: Optional[int] = logging.INFO) -> None: - datasets.utils.logging.set_verbosity(log_level) transformers.utils.logging.set_verbosity(log_level) transformers.utils.logging.enable_default_handler() transformers.utils.logging.enable_explicit_format() @@ -243,7 +241,6 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: str(model_args.compute_dtype), ) ) - logger.info(f"Training/evaluation parameters {training_args}") transformers.set_seed(training_args.seed) diff --git a/src/llmtuner/model/loader.py b/src/llmtuner/model/loader.py index cf3fb787..29d213a7 100644 --- a/src/llmtuner/model/loader.py +++ b/src/llmtuner/model/loader.py @@ -51,7 +51,7 @@ def load_model_and_tokenizer( patch_tokenizer(tokenizer) config = AutoConfig.from_pretrained(model_args.model_name_or_path, **config_kwargs) - patch_config(config, tokenizer, model_args, finetuning_args, config_kwargs, is_trainable) + patch_config(config, tokenizer, model_args, config_kwargs, is_trainable) model = None if is_trainable and model_args.use_unsloth: diff --git a/src/llmtuner/model/patcher.py b/src/llmtuner/model/patcher.py index 2eba15b6..87127480 100644 --- a/src/llmtuner/model/patcher.py +++ b/src/llmtuner/model/patcher.py @@ -24,7 +24,7 @@ if TYPE_CHECKING: from transformers import PretrainedConfig, PreTrainedTokenizer from trl import AutoModelForCausalLMWithValueHead - from ..hparams import FinetuningArguments, ModelArguments + from ..hparams import ModelArguments logger = get_logger(__name__) @@ -157,7 +157,7 @@ def _configure_quantization( config_kwargs: Dict[str, Any], ) -> None: r""" - Priority: GPTQ-quantized (training) > AutoGPTQ (export) > Bitsandbytes (training) + Priority: PTQ-quantized (training) > AutoGPTQ (export) > Bitsandbytes (training) """ if getattr(config, "quantization_config", None): # gptq if is_deepspeed_zero3_enabled(): @@ -167,7 +167,15 @@ def _configure_quantization( quantization_config: Dict[str, Any] = getattr(config, "quantization_config", None) if quantization_config.get("quant_method", None) == "gptq" and quantization_config.get("bits", -1) == 4: quantization_config["use_exllama"] = False # disable exllama - logger.info("Loading {}-bit GPTQ-quantized model.".format(quantization_config.get("bits", -1))) + + if quantization_config.get("quant_method", None) == "aqlm": + quantization_config["bits"] = 2 + + logger.info( + "Loading {}-bit {}-quantized model.".format( + quantization_config.get("bits", "?"), quantization_config.get("quant_method", None) + ) + ) elif model_args.export_quantization_bit is not None: # auto-gptq require_version("optimum>=1.16.0", "To fix: pip install optimum>=1.16.0") @@ -253,7 +261,6 @@ def patch_config( config: "PretrainedConfig", tokenizer: "PreTrainedTokenizer", model_args: "ModelArguments", - finetuning_args: "FinetuningArguments", config_kwargs: Dict[str, Any], is_trainable: bool, ) -> None: @@ -274,9 +281,6 @@ def patch_config( _configure_quantization(config, tokenizer, model_args, config_kwargs) - if finetuning_args.use_dora: - config_kwargs["device_map"] = {"": get_current_device()} - def patch_model( model: "PreTrainedModel", tokenizer: "PreTrainedTokenizer", model_args: "ModelArguments", is_trainable: bool diff --git a/src/llmtuner/webui/components/train.py b/src/llmtuner/webui/components/train.py index 4865e2df..bfec4c4b 100644 --- a/src/llmtuner/webui/components/train.py +++ b/src/llmtuner/webui/components/train.py @@ -34,7 +34,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: elem_dict.update(dict(training_stage=training_stage, dataset_dir=dataset_dir, dataset=dataset, **preview_elems)) with gr.Row(): - cutoff_len = gr.Slider(value=1024, minimum=4, maximum=8192, step=1) + cutoff_len = gr.Slider(value=1024, minimum=4, maximum=16384, step=1) learning_rate = gr.Textbox(value="5e-5") num_train_epochs = gr.Textbox(value="3.0") max_samples = gr.Textbox(value="100000") @@ -52,8 +52,8 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: ) with gr.Row(): - batch_size = gr.Slider(value=4, minimum=1, maximum=1024, step=1) - gradient_accumulation_steps = gr.Slider(value=4, minimum=1, maximum=1024, step=1) + batch_size = gr.Slider(value=2, minimum=1, maximum=1024, step=1) + gradient_accumulation_steps = gr.Slider(value=8, minimum=1, maximum=1024, step=1) lr_scheduler_type = gr.Dropdown(choices=[scheduler.value for scheduler in SchedulerType], value="cosine") max_grad_norm = gr.Textbox(value="1.0") val_size = gr.Slider(value=0, minimum=0, maximum=1, step=0.001) @@ -122,25 +122,31 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: with gr.Accordion(label="LoRA config", open=False) as lora_tab: with gr.Row(): - lora_rank = gr.Slider(value=8, minimum=1, maximum=1024, step=1) - lora_dropout = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01) - lora_target = gr.Textbox() - additional_target = gr.Textbox() + lora_rank = gr.Slider(value=8, minimum=1, maximum=1024, step=1, scale=1) + lora_alpha = gr.Slider(value=16, minimum=1, maximum=2048, step=0.1, scale=1) + lora_dropout = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, scale=1) + lora_target = gr.Textbox(scale=2) - with gr.Column(): - use_rslora = gr.Checkbox() - create_new_adapter = gr.Checkbox() + with gr.Row(): + use_rslora = gr.Checkbox(scale=1) + use_dora = gr.Checkbox(scale=1) + create_new_adapter = gr.Checkbox(scale=1) + additional_target = gr.Textbox(scale=2) - input_elems.update({lora_rank, lora_dropout, lora_target, additional_target, use_rslora, create_new_adapter}) + input_elems.update( + {lora_rank, lora_alpha, lora_dropout, lora_target, use_rslora, use_dora, create_new_adapter, additional_target} + ) elem_dict.update( dict( lora_tab=lora_tab, lora_rank=lora_rank, + lora_alpha=lora_alpha, lora_dropout=lora_dropout, lora_target=lora_target, - additional_target=additional_target, use_rslora=use_rslora, + use_dora=use_dora, create_new_adapter=create_new_adapter, + additional_target=additional_target, ) ) diff --git a/src/llmtuner/webui/locales.py b/src/llmtuner/webui/locales.py index cc7f3e6c..6ad5fc7c 100644 --- a/src/llmtuner/webui/locales.py +++ b/src/llmtuner/webui/locales.py @@ -572,6 +572,20 @@ LOCALES = { "info": "LoRA 矩阵的秩。", }, }, + "lora_alpha": { + "en": { + "label": "LoRA Alpha", + "info": "Lora scaling coefficient.", + }, + "ru": { + "label": "LoRA Alpha", + "info": "Коэффициент масштабирования LoRA.", + }, + "zh": { + "label": "LoRA 缩放系数", + "info": "LoRA 缩放系数大小。", + }, + }, "lora_dropout": { "en": { "label": "LoRA Dropout", @@ -600,6 +614,48 @@ LOCALES = { "info": "应用 LoRA 的目标模块名称。使用英文逗号分隔多个名称。", }, }, + "use_rslora": { + "en": { + "label": "Use rslora", + "info": "Use the rank stabilization scaling factor for LoRA layer.", + }, + "ru": { + "label": "Использовать rslora", + "info": "Использовать коэффициент масштабирования стабилизации ранга для слоя LoRA.", + }, + "zh": { + "label": "使用 rslora", + "info": "对 LoRA 层使用秩稳定缩放方法。", + }, + }, + "use_dora": { + "en": { + "label": "Use DoRA", + "info": "Use weight-decomposed LoRA.", + }, + "ru": { + "label": "Используйте DoRA", + "info": "Используйте LoRA с декомпозицией весов.", + }, + "zh": { + "label": "使用 DoRA", + "info": "使用权重分解的 LoRA。", + }, + }, + "create_new_adapter": { + "en": { + "label": "Create new adapter", + "info": "Create a new adapter with randomly initialized weight upon the existing one.", + }, + "ru": { + "label": "Создать новый адаптер", + "info": "Создать новый адаптер с случайной инициализацией веса на основе существующего.", + }, + "zh": { + "label": "新建适配器", + "info": "在现有的适配器上创建一个随机初始化后的新适配器。", + }, + }, "additional_target": { "en": { "label": "Additional modules (optional)", @@ -617,34 +673,6 @@ LOCALES = { "info": "除 LoRA 层以外的可训练模块名称。使用英文逗号分隔多个名称。", }, }, - "use_rslora": { - "en": { - "label": "Use rslora", - "info": "Use the rank stabilization scaling factor for LoRA layer.", - }, - "ru": { - "label": "Использовать rslora", - "info": "Использовать коэффициент масштабирования стабилизации ранга для слоя LoRA.", - }, - "zh": { - "label": "使用 rslora", - "info": "对 LoRA 层使用秩稳定缩放方法。", - }, - }, - "create_new_adapter": { - "en": { - "label": "Create new adapter", - "info": "Create a new adapter with randomly initialized weight upon the existing one.", - }, - "ru": { - "label": "Создать новый адаптер", - "info": "Создать новый адаптер с случайной инициализацией веса на основе существующего.", - }, - "zh": { - "label": "新建适配器", - "info": "在现有的适配器上创建一个随机初始化后的新适配器。", - }, - }, "rlhf_tab": { "en": { "label": "RLHF configurations", @@ -1055,6 +1083,11 @@ ALERTS = { "ru": "Неверная схема JSON.", "zh": "Json 格式错误。", }, + "warn_no_cuda": { + "en": "CUDA environment was not detected.", + "ru": "Среда CUDA не обнаружена.", + "zh": "未检测到 CUDA 环境。", + }, "info_aborting": { "en": "Aborted, wait for terminating...", "ru": "Прервано, ожидание завершения...", diff --git a/src/llmtuner/webui/runner.py b/src/llmtuner/webui/runner.py index 644e6495..c6fd4ae6 100644 --- a/src/llmtuner/webui/runner.py +++ b/src/llmtuner/webui/runner.py @@ -8,6 +8,7 @@ import gradio as gr import transformers from gradio.components import Component # cannot use TYPE_CHECKING here from transformers.trainer import TRAINING_ARGS_NAME +from transformers.utils import is_torch_cuda_available from ..extras.callbacks import LogCallback from ..extras.constants import TRAINING_STAGES @@ -64,12 +65,15 @@ class Runner: if len(dataset) == 0: return ALERTS["err_no_dataset"][lang] - if self.demo_mode and (not from_preview): + if not from_preview and self.demo_mode: return ALERTS["err_demo"][lang] if not from_preview and get_device_count() > 1: return ALERTS["err_device_count"][lang] + if not from_preview and not is_torch_cuda_available(): + gr.Warning(ALERTS["warn_no_cuda"][lang]) + self.aborted = False self.logger_handler.reset() self.trainer_callback = LogCallback(self) @@ -139,11 +143,13 @@ class Runner: args["num_layer_trainable"] = int(get("train.num_layer_trainable")) args["name_module_trainable"] = get("train.name_module_trainable") elif args["finetuning_type"] == "lora": - args["lora_rank"] = get("train.lora_rank") - args["lora_dropout"] = get("train.lora_dropout") + args["lora_rank"] = int(get("train.lora_rank")) + args["lora_alpha"] = float(get("train.lora_alpha")) + args["lora_dropout"] = float(get("train.lora_dropout")) args["lora_target"] = get("train.lora_target") or get_module(get("top.model_name")) - args["additional_target"] = get("train.additional_target") or None args["use_rslora"] = get("train.use_rslora") + args["use_dora"] = get("train.use_dora") + args["additional_target"] = get("train.additional_target") or None if args["stage"] in ["rm", "ppo", "dpo"]: args["create_new_adapter"] = args["quantization_bit"] is None else: diff --git a/src/llmtuner/webui/utils.py b/src/llmtuner/webui/utils.py index 1d63f23c..55a4865b 100644 --- a/src/llmtuner/webui/utils.py +++ b/src/llmtuner/webui/utils.py @@ -44,11 +44,14 @@ def can_quantize(finetuning_type: str) -> Dict[str, Any]: def check_json_schema(text: str, lang: str) -> None: try: tools = json.loads(text) - for tool in tools: - assert "name" in tool - except AssertionError: + if tools: + assert isinstance(tools, list) + for tool in tools: + if "name" not in tool: + raise ValueError("Name not found.") + except ValueError: gr.Warning(ALERTS["err_tool_name"][lang]) - except json.JSONDecodeError: + except Exception: gr.Warning(ALERTS["err_json_schema"][lang])