From 10be2f0eccc3963a985afcd24e5b8b8fc638b1c3 Mon Sep 17 00:00:00 2001
From: hiyouga <hiyouga@buaa.edu.cn>
Date: Sat, 9 Mar 2024 00:09:09 +0800
Subject: [PATCH] fix aqlm version

---
 README.md                                        |  4 ++--
 README_zh.md                                     |  4 ++--
 examples/extras/galore/galore_adamw_8bit_bf16.sh |  2 +-
 setup.py                                         |  2 +-
 src/llmtuner/model/loader.py                     | 13 +------------
 src/llmtuner/model/patcher.py                    |  1 +
 6 files changed, 8 insertions(+), 18 deletions(-)

diff --git a/README.md b/README.md
index 4232da90..51c4c5e8 100644
--- a/README.md
+++ b/README.md
@@ -254,7 +254,7 @@ huggingface-cli login
 | Mandatory    | Minimum | Recommend |
 | ------------ | ------- | --------- |
 | python       | 3.8     | 3.10      |
-| torch        | 1.13.1  | 2.2.1     |
+| torch        | 1.13.1  | 2.2.0     |
 | transformers | 4.37.2  | 4.38.2    |
 | datasets     | 2.14.3  | 2.17.1    |
 | accelerate   | 0.27.2  | 0.27.2    |
@@ -264,7 +264,7 @@ huggingface-cli login
 | Optional     | Minimum | Recommend |
 | ------------ | ------- | --------- |
 | CUDA         | 11.6    | 12.2      |
-| deepspeed    | 0.10.0  | 0.13.4    |
+| deepspeed    | 0.10.0  | 0.13.1    |
 | bitsandbytes | 0.39.0  | 0.41.3    |
 | flash-attn   | 2.3.0   | 2.5.5     |
 
diff --git a/README_zh.md b/README_zh.md
index 66d86d30..a4971fa7 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -254,7 +254,7 @@ huggingface-cli login
 | 必需项       | 至少     | 推荐      |
 | ------------ | ------- | --------- |
 | python       | 3.8     | 3.10      |
-| torch        | 1.13.1  | 2.2.1     |
+| torch        | 1.13.1  | 2.2.0     |
 | transformers | 4.37.2  | 4.38.2    |
 | datasets     | 2.14.3  | 2.17.1    |
 | accelerate   | 0.27.2  | 0.27.2    |
@@ -264,7 +264,7 @@ huggingface-cli login
 | 可选项       | 至少     | 推荐      |
 | ------------ | ------- | --------- |
 | CUDA         | 11.6    | 12.2      |
-| deepspeed    | 0.10.0  | 0.13.4    |
+| deepspeed    | 0.10.0  | 0.13.1    |
 | bitsandbytes | 0.39.0  | 0.41.3    |
 | flash-attn   | 2.3.0   | 2.5.5     |
 
diff --git a/examples/extras/galore/galore_adamw_8bit_bf16.sh b/examples/extras/galore/galore_adamw_8bit_bf16.sh
index ddddcb33..881ab2eb 100644
--- a/examples/extras/galore/galore_adamw_8bit_bf16.sh
+++ b/examples/extras/galore/galore_adamw_8bit_bf16.sh
@@ -8,10 +8,10 @@ CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \
     --dataset_dir ../../../data \
     --template default \
     --finetuning_type full \
+    --optim adamw_8bit \
     --use_galore \
     --galore_target mlp,self_attn \
     --galore_rank 16 \
-    --optim adamw_8bit \
     --output_dir ../../../saves/LLaMA2-7B/galore/sft \
     --overwrite_cache \
     --overwrite_output_dir \
diff --git a/setup.py b/setup.py
index ca8db70a..8f6b74a9 100644
--- a/setup.py
+++ b/setup.py
@@ -26,7 +26,7 @@ extra_require = {
     "bitsandbytes": ["bitsandbytes>=0.39.0"],
     "gptq": ["optimum>=1.16.0", "auto-gptq>=0.5.0"],
     "awq": ["autoawq"],
-    "aqlm": ["aqlm[gpu,cpu]"],
+    "aqlm": ["aqlm[gpu]>=1.1.0", "transformers @ git+https://github.com/huggingface/transformers.git"],
     "galore": ["galore_torch @ git+https://github.com/hiyouga/GaLore.git"],
     "qwen": ["tiktoken", "transformers_stream_generator"],
 }
diff --git a/src/llmtuner/model/loader.py b/src/llmtuner/model/loader.py
index e5b3bdd1..0760e792 100644
--- a/src/llmtuner/model/loader.py
+++ b/src/llmtuner/model/loader.py
@@ -1,4 +1,3 @@
-from contextlib import nullcontext
 from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple
 
 from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
@@ -87,17 +86,7 @@ def load_model(
             logger.warning("Unsloth does not support loading adapters.")
 
     if model is None:
-        model_init_context = nullcontext()
-        if model_args.aqlm_optimization and getattr(config, "quantization_config", None):
-            quantization_config: Dict[str, Any] = getattr(config, "quantization_config", None)
-            if quantization_config.get("quant_method", None) == "aqlm":
-                import aqlm  # type: ignore
-
-                model_init_context = aqlm.optimize_for_training()
-                logger.info("Optimize for AQLM training.")  # https://github.com/Vahe1994/AQLM/issues/38
-
-        with model_init_context:
-            model = AutoModelForCausalLM.from_pretrained(model_args.model_name_or_path, config=config, **init_kwargs)
+        model = AutoModelForCausalLM.from_pretrained(model_args.model_name_or_path, config=config, **init_kwargs)
 
     patch_model(model, tokenizer, model_args, is_trainable)
     register_autoclass(config, model, tokenizer)
diff --git a/src/llmtuner/model/patcher.py b/src/llmtuner/model/patcher.py
index 4ecfcc86..443a4491 100644
--- a/src/llmtuner/model/patcher.py
+++ b/src/llmtuner/model/patcher.py
@@ -174,6 +174,7 @@ def _configure_quantization(
             require_version(
                 "transformers>=4.39.0.dev0", "To fix: pip install git+https://github.com/huggingface/transformers.git"
             )
+            require_version("aqlm>=1.1.0", "To fix: pip install aqlm[gpu]>=1.1.0")
             quantization_config["bits"] = 2
 
         quant_bits = quantization_config.get("bits", "?")