From 10be2f0eccc3963a985afcd24e5b8b8fc638b1c3 Mon Sep 17 00:00:00 2001 From: hiyouga Date: Sat, 9 Mar 2024 00:09:09 +0800 Subject: [PATCH] fix aqlm version --- README.md | 4 ++-- README_zh.md | 4 ++-- examples/extras/galore/galore_adamw_8bit_bf16.sh | 2 +- setup.py | 2 +- src/llmtuner/model/loader.py | 13 +------------ src/llmtuner/model/patcher.py | 1 + 6 files changed, 8 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 4232da90..51c4c5e8 100644 --- a/README.md +++ b/README.md @@ -254,7 +254,7 @@ huggingface-cli login | Mandatory | Minimum | Recommend | | ------------ | ------- | --------- | | python | 3.8 | 3.10 | -| torch | 1.13.1 | 2.2.1 | +| torch | 1.13.1 | 2.2.0 | | transformers | 4.37.2 | 4.38.2 | | datasets | 2.14.3 | 2.17.1 | | accelerate | 0.27.2 | 0.27.2 | @@ -264,7 +264,7 @@ huggingface-cli login | Optional | Minimum | Recommend | | ------------ | ------- | --------- | | CUDA | 11.6 | 12.2 | -| deepspeed | 0.10.0 | 0.13.4 | +| deepspeed | 0.10.0 | 0.13.1 | | bitsandbytes | 0.39.0 | 0.41.3 | | flash-attn | 2.3.0 | 2.5.5 | diff --git a/README_zh.md b/README_zh.md index 66d86d30..a4971fa7 100644 --- a/README_zh.md +++ b/README_zh.md @@ -254,7 +254,7 @@ huggingface-cli login | 必需项 | 至少 | 推荐 | | ------------ | ------- | --------- | | python | 3.8 | 3.10 | -| torch | 1.13.1 | 2.2.1 | +| torch | 1.13.1 | 2.2.0 | | transformers | 4.37.2 | 4.38.2 | | datasets | 2.14.3 | 2.17.1 | | accelerate | 0.27.2 | 0.27.2 | @@ -264,7 +264,7 @@ huggingface-cli login | 可选项 | 至少 | 推荐 | | ------------ | ------- | --------- | | CUDA | 11.6 | 12.2 | -| deepspeed | 0.10.0 | 0.13.4 | +| deepspeed | 0.10.0 | 0.13.1 | | bitsandbytes | 0.39.0 | 0.41.3 | | flash-attn | 2.3.0 | 2.5.5 | diff --git a/examples/extras/galore/galore_adamw_8bit_bf16.sh b/examples/extras/galore/galore_adamw_8bit_bf16.sh index ddddcb33..881ab2eb 100644 --- a/examples/extras/galore/galore_adamw_8bit_bf16.sh +++ b/examples/extras/galore/galore_adamw_8bit_bf16.sh @@ -8,10 +8,10 @@ CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \ --dataset_dir ../../../data \ --template default \ --finetuning_type full \ + --optim adamw_8bit \ --use_galore \ --galore_target mlp,self_attn \ --galore_rank 16 \ - --optim adamw_8bit \ --output_dir ../../../saves/LLaMA2-7B/galore/sft \ --overwrite_cache \ --overwrite_output_dir \ diff --git a/setup.py b/setup.py index ca8db70a..8f6b74a9 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ extra_require = { "bitsandbytes": ["bitsandbytes>=0.39.0"], "gptq": ["optimum>=1.16.0", "auto-gptq>=0.5.0"], "awq": ["autoawq"], - "aqlm": ["aqlm[gpu,cpu]"], + "aqlm": ["aqlm[gpu]>=1.1.0", "transformers @ git+https://github.com/huggingface/transformers.git"], "galore": ["galore_torch @ git+https://github.com/hiyouga/GaLore.git"], "qwen": ["tiktoken", "transformers_stream_generator"], } diff --git a/src/llmtuner/model/loader.py b/src/llmtuner/model/loader.py index e5b3bdd1..0760e792 100644 --- a/src/llmtuner/model/loader.py +++ b/src/llmtuner/model/loader.py @@ -1,4 +1,3 @@ -from contextlib import nullcontext from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer @@ -87,17 +86,7 @@ def load_model( logger.warning("Unsloth does not support loading adapters.") if model is None: - model_init_context = nullcontext() - if model_args.aqlm_optimization and getattr(config, "quantization_config", None): - quantization_config: Dict[str, Any] = getattr(config, "quantization_config", None) - if quantization_config.get("quant_method", None) == "aqlm": - import aqlm # type: ignore - - model_init_context = aqlm.optimize_for_training() - logger.info("Optimize for AQLM training.") # https://github.com/Vahe1994/AQLM/issues/38 - - with model_init_context: - model = AutoModelForCausalLM.from_pretrained(model_args.model_name_or_path, config=config, **init_kwargs) + model = AutoModelForCausalLM.from_pretrained(model_args.model_name_or_path, config=config, **init_kwargs) patch_model(model, tokenizer, model_args, is_trainable) register_autoclass(config, model, tokenizer) diff --git a/src/llmtuner/model/patcher.py b/src/llmtuner/model/patcher.py index 4ecfcc86..443a4491 100644 --- a/src/llmtuner/model/patcher.py +++ b/src/llmtuner/model/patcher.py @@ -174,6 +174,7 @@ def _configure_quantization( require_version( "transformers>=4.39.0.dev0", "To fix: pip install git+https://github.com/huggingface/transformers.git" ) + require_version("aqlm>=1.1.0", "To fix: pip install aqlm[gpu]>=1.1.0") quantization_config["bits"] = 2 quant_bits = quantization_config.get("bits", "?")