diff --git a/Makefile b/Makefile index 26287cee..5c754167 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,11 @@ .PHONY: quality style -check_dirs := src tests +check_dirs := scripts src quality: - ruff $(check_dirs) + ruff check $(check_dirs) ruff format --check $(check_dirs) style: - ruff $(check_dirs) --fix + ruff check $(check_dirs) --fix ruff format $(check_dirs) diff --git a/README.md b/README.md index e3842435..b6e49c86 100644 --- a/README.md +++ b/README.md @@ -502,10 +502,13 @@ use_cpu: false +> [!TIP] +> We commend using Accelerate for LoRA tuning. + #### Use DeepSpeed ```bash -deepspeed --num_gpus 8 --master_port=9901 src/train_bash.py \ +deepspeed --num_gpus 8 src/train_bash.py \ --deepspeed ds_config.json \ ... # arguments (same as above) ``` @@ -522,25 +525,32 @@ deepspeed --num_gpus 8 --master_port=9901 src/train_bash.py \ "fp16": { "enabled": "auto", "loss_scale": 0, - "initial_scale_power": 16, "loss_scale_window": 1000, + "initial_scale_power": 16, "hysteresis": 2, "min_loss_scale": 1 }, + "bf16": { + "enabled": "auto" + }, "zero_optimization": { "stage": 2, "allgather_partitions": true, "allgather_bucket_size": 5e8, + "overlap_comm": true, "reduce_scatter": true, "reduce_bucket_size": 5e8, - "overlap_comm": false, - "contiguous_gradients": true + "contiguous_gradients": true, + "round_robin_gradients": true } } ``` +> [!TIP] +> Refer to [examples](examples) for more training scripts. + ### Merge LoRA weights and export model ```bash diff --git a/README_zh.md b/README_zh.md index 498b2d5b..eaea39eb 100644 --- a/README_zh.md +++ b/README_zh.md @@ -501,10 +501,13 @@ use_cpu: false +> [!TIP] +> 我们推荐使用 Accelerate 进行 LoRA 训练。 + #### 使用 DeepSpeed ```bash -deepspeed --num_gpus 8 --master_port=9901 src/train_bash.py \ +deepspeed --num_gpus 8 src/train_bash.py \ --deepspeed ds_config.json \ ... # 参数同上 ``` @@ -521,25 +524,32 @@ deepspeed --num_gpus 8 --master_port=9901 src/train_bash.py \ "fp16": { "enabled": "auto", "loss_scale": 0, - "initial_scale_power": 16, "loss_scale_window": 1000, + "initial_scale_power": 16, "hysteresis": 2, "min_loss_scale": 1 }, + "bf16": { + "enabled": "auto" + }, "zero_optimization": { "stage": 2, "allgather_partitions": true, "allgather_bucket_size": 5e8, + "overlap_comm": true, "reduce_scatter": true, "reduce_bucket_size": 5e8, - "overlap_comm": false, - "contiguous_gradients": true + "contiguous_gradients": true, + "round_robin_gradients": true } } ``` +> [!TIP] +> 更多训练脚本请查看 [examples](examples)。 + ### 合并 LoRA 权重并导出模型 ```bash diff --git a/pyproject.toml b/pyproject.toml index 555515ea..0316276b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,5 +28,6 @@ known-third-party = [ [tool.ruff.format] quote-style = "double" indent-style = "space" +docstring-code-format = true skip-magic-trailing-comma = false line-ending = "auto" diff --git a/tests/cal_flops.py b/scripts/cal_flops.py similarity index 100% rename from tests/cal_flops.py rename to scripts/cal_flops.py diff --git a/tests/cal_lr.py b/scripts/cal_lr.py similarity index 100% rename from tests/cal_lr.py rename to scripts/cal_lr.py diff --git a/tests/length_cdf.py b/scripts/length_cdf.py similarity index 100% rename from tests/length_cdf.py rename to scripts/length_cdf.py diff --git a/tests/llama_pro.py b/scripts/llama_pro.py similarity index 100% rename from tests/llama_pro.py rename to scripts/llama_pro.py diff --git a/tests/llamafy_baichuan2.py b/scripts/llamafy_baichuan2.py similarity index 100% rename from tests/llamafy_baichuan2.py rename to scripts/llamafy_baichuan2.py diff --git a/tests/llamafy_internlm2.py b/scripts/llamafy_internlm2.py similarity index 100% rename from tests/llamafy_internlm2.py rename to scripts/llamafy_internlm2.py diff --git a/tests/llamafy_qwen.py b/scripts/llamafy_qwen.py similarity index 100% rename from tests/llamafy_qwen.py rename to scripts/llamafy_qwen.py diff --git a/tests/loftq_init.py b/scripts/loftq_init.py similarity index 100% rename from tests/loftq_init.py rename to scripts/loftq_init.py diff --git a/tests/test_toolcall.py b/scripts/test_toolcall.py similarity index 100% rename from tests/test_toolcall.py rename to scripts/test_toolcall.py diff --git a/src/llmtuner/data/formatter.py b/src/llmtuner/data/formatter.py index 82dff8ca..a9632931 100644 --- a/src/llmtuner/data/formatter.py +++ b/src/llmtuner/data/formatter.py @@ -75,8 +75,7 @@ class Formatter(ABC): tool_format: Literal["default"] = "default" @abstractmethod - def apply(self, **kwargs) -> SLOTS: - ... + def apply(self, **kwargs) -> SLOTS: ... def extract(self, content: str) -> Union[str, Tuple[str, str]]: raise NotImplementedError diff --git a/src/llmtuner/extras/misc.py b/src/llmtuner/extras/misc.py index 348f6c6c..9d93487f 100644 --- a/src/llmtuner/extras/misc.py +++ b/src/llmtuner/extras/misc.py @@ -14,6 +14,7 @@ from transformers.utils import ( is_torch_npu_available, is_torch_xpu_available, ) +from transformers.utils.versions import require_version from .constants import V_HEAD_SAFE_WEIGHTS_NAME, V_HEAD_WEIGHTS_NAME from .logging import get_logger @@ -56,6 +57,17 @@ class AverageMeter: self.avg = self.sum / self.count +def check_dependencies() -> None: + if int(os.environ.get("DISABLE_VERSION_CHECK", "0")): + logger.warning("Version checking has been disabled, may lead to unexpected behaviors.") + else: + require_version("transformers>=4.37.2", "To fix: pip install transformers>=4.37.2") + require_version("datasets>=2.14.3", "To fix: pip install datasets>=2.14.3") + require_version("accelerate>=0.27.2", "To fix: pip install accelerate>=0.27.2") + require_version("peft>=0.9.0", "To fix: pip install peft>=0.9.0") + require_version("trl>=0.7.11", "To fix: pip install trl>=0.7.11") + + def count_parameters(model: torch.nn.Module) -> Tuple[int, int]: r""" Returns the number of trainable parameters and number of all parameters in the model. diff --git a/src/llmtuner/hparams/finetuning_args.py b/src/llmtuner/hparams/finetuning_args.py index aebcb83a..59ae6948 100644 --- a/src/llmtuner/hparams/finetuning_args.py +++ b/src/llmtuner/hparams/finetuning_args.py @@ -173,10 +173,6 @@ class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments): default=False, metadata={"help": "Whether or not to make only the parameters in the expanded blocks trainable."}, ) - disable_version_checking: Optional[bool] = field( - default=False, - metadata={"help": "Whether or not to disable version checking."}, - ) plot_loss: Optional[bool] = field( default=False, metadata={"help": "Whether or not to save the training loss curves."}, diff --git a/src/llmtuner/hparams/parser.py b/src/llmtuner/hparams/parser.py index 8f9d81e3..f8a2aa4b 100644 --- a/src/llmtuner/hparams/parser.py +++ b/src/llmtuner/hparams/parser.py @@ -7,7 +7,6 @@ import torch import transformers from transformers import HfArgumentParser, Seq2SeqTrainingArguments from transformers.trainer_utils import get_last_checkpoint -from transformers.utils.versions import require_version from ..extras.logging import get_logger from ..extras.packages import is_unsloth_available @@ -29,17 +28,6 @@ _EVAL_ARGS = [ModelArguments, DataArguments, EvaluationArguments, FinetuningArgu _EVAL_CLS = Tuple[ModelArguments, DataArguments, EvaluationArguments, FinetuningArguments] -def _check_dependencies(disabled: bool) -> None: - if disabled: - logger.warning("Version checking has been disabled, may lead to unexpected behaviors.") - else: - require_version("transformers>=4.37.2", "To fix: pip install transformers>=4.37.2") - require_version("datasets>=2.14.3", "To fix: pip install datasets>=2.14.3") - require_version("accelerate>=0.27.2", "To fix: pip install accelerate>=0.27.2") - require_version("peft>=0.9.0", "To fix: pip install peft>=0.9.0") - require_version("trl>=0.7.11", "To fix: pip install trl>=0.7.11") - - def _parse_args(parser: "HfArgumentParser", args: Optional[Dict[str, Any]] = None) -> Tuple[Any]: if args is not None: return parser.parse_dict(args) @@ -152,7 +140,6 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: raise ValueError("Unsloth does not support DoRA.") _verify_model_args(model_args, finetuning_args) - _check_dependencies(disabled=finetuning_args.disable_version_checking) if ( training_args.do_train @@ -249,7 +236,6 @@ def get_infer_args(args: Optional[Dict[str, Any]] = None) -> _INFER_CLS: _set_transformers_logging() _verify_model_args(model_args, finetuning_args) - _check_dependencies(disabled=finetuning_args.disable_version_checking) if data_args.template is None: raise ValueError("Please specify which `template` to use.") @@ -262,7 +248,6 @@ def get_eval_args(args: Optional[Dict[str, Any]] = None) -> _EVAL_CLS: _set_transformers_logging() _verify_model_args(model_args, finetuning_args) - _check_dependencies(disabled=finetuning_args.disable_version_checking) model_args.aqlm_optimization = True if data_args.template is None: diff --git a/src/llmtuner/model/loader.py b/src/llmtuner/model/loader.py index e5b3bdd1..588b5012 100644 --- a/src/llmtuner/model/loader.py +++ b/src/llmtuner/model/loader.py @@ -5,7 +5,7 @@ from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer from trl import AutoModelForCausalLMWithValueHead from ..extras.logging import get_logger -from ..extras.misc import count_parameters, get_current_device, try_download_model_from_ms +from ..extras.misc import check_dependencies, count_parameters, get_current_device, try_download_model_from_ms from .adapter import init_adapter from .patcher import patch_config, patch_model, patch_tokenizer, patch_valuehead_model from .utils import load_valuehead_params, register_autoclass @@ -20,6 +20,9 @@ if TYPE_CHECKING: logger = get_logger(__name__) +check_dependencies() + + def _get_init_kwargs(model_args: "ModelArguments") -> Dict[str, Any]: return { "trust_remote_code": True,