fix aqlm version

2024-03-09 00:09:09 +08:00 · 2024-03-09 00:09:09 +08:00 · 10be2f0ecc
parent 8a45213440
commit 10be2f0ecc
6 changed files with 8 additions and 18 deletions
--- a/README.md
+++ b/README.md
@ -254,7 +254,7 @@ huggingface-cli login
 | Mandatory    | Minimum | Recommend |
 | ------------ | ------- | --------- |
 | python       | 3.8     | 3.10      |
-| torch        | 1.13.1  | 2.2.1     |
+| torch        | 1.13.1  | 2.2.0     |
 | transformers | 4.37.2  | 4.38.2    |
 | datasets     | 2.14.3  | 2.17.1    |
 | accelerate   | 0.27.2  | 0.27.2    |
@ -264,7 +264,7 @@ huggingface-cli login
 | Optional     | Minimum | Recommend |
 | ------------ | ------- | --------- |
 | CUDA         | 11.6    | 12.2      |
-| deepspeed    | 0.10.0  | 0.13.4    |
+| deepspeed    | 0.10.0  | 0.13.1    |
 | bitsandbytes | 0.39.0  | 0.41.3    |
 | flash-attn   | 2.3.0   | 2.5.5     |

--- a/README_zh.md
+++ b/README_zh.md
@ -254,7 +254,7 @@ huggingface-cli login
 | 必需项       | 至少     | 推荐      |
 | ------------ | ------- | --------- |
 | python       | 3.8     | 3.10      |
-| torch        | 1.13.1  | 2.2.1     |
+| torch        | 1.13.1  | 2.2.0     |
 | transformers | 4.37.2  | 4.38.2    |
 | datasets     | 2.14.3  | 2.17.1    |
 | accelerate   | 0.27.2  | 0.27.2    |
@ -264,7 +264,7 @@ huggingface-cli login
 | 可选项       | 至少     | 推荐      |
 | ------------ | ------- | --------- |
 | CUDA         | 11.6    | 12.2      |
-| deepspeed    | 0.10.0  | 0.13.4    |
+| deepspeed    | 0.10.0  | 0.13.1    |
 | bitsandbytes | 0.39.0  | 0.41.3    |
 | flash-attn   | 2.3.0   | 2.5.5     |

--- a/examples/extras/galore/galore_adamw_8bit_bf16.sh
+++ b/examples/extras/galore/galore_adamw_8bit_bf16.sh
@ -8,10 +8,10 @@ CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \
    --dataset_dir ../../../data \
    --template default \
    --finetuning_type full \
+    --optim adamw_8bit \
    --use_galore \
    --galore_target mlp,self_attn \
    --galore_rank 16 \
-    --optim adamw_8bit \
    --output_dir ../../../saves/LLaMA2-7B/galore/sft \
    --overwrite_cache \
    --overwrite_output_dir \
--- a/setup.py
+++ b/setup.py
@ -26,7 +26,7 @@ extra_require = {
    "bitsandbytes": ["bitsandbytes>=0.39.0"],
    "gptq": ["optimum>=1.16.0", "auto-gptq>=0.5.0"],
    "awq": ["autoawq"],
-    "aqlm": ["aqlm[gpu,cpu]"],
+    "aqlm": ["aqlm[gpu]>=1.1.0", "transformers @ git+https://github.com/huggingface/transformers.git"],
    "galore": ["galore_torch @ git+https://github.com/hiyouga/GaLore.git"],
    "qwen": ["tiktoken", "transformers_stream_generator"],
 }
--- a/src/llmtuner/model/loader.py
+++ b/src/llmtuner/model/loader.py
@ -1,4 +1,3 @@
-from contextlib import nullcontext
 from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple

 from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
@ -87,16 +86,6 @@ def load_model(
            logger.warning("Unsloth does not support loading adapters.")

    if model is None:
-        model_init_context = nullcontext()
-        if model_args.aqlm_optimization and getattr(config, "quantization_config", None):
-            quantization_config: Dict[str, Any] = getattr(config, "quantization_config", None)
-            if quantization_config.get("quant_method", None) == "aqlm":
-                import aqlm  # type: ignore
-
-                model_init_context = aqlm.optimize_for_training()
-                logger.info("Optimize for AQLM training.")  # https://github.com/Vahe1994/AQLM/issues/38
-
-        with model_init_context:
        model = AutoModelForCausalLM.from_pretrained(model_args.model_name_or_path, config=config, **init_kwargs)

    patch_model(model, tokenizer, model_args, is_trainable)
--- a/src/llmtuner/model/patcher.py
+++ b/src/llmtuner/model/patcher.py
@ -174,6 +174,7 @@ def _configure_quantization(
            require_version(
                "transformers>=4.39.0.dev0", "To fix: pip install git+https://github.com/huggingface/transformers.git"
            )
+            require_version("aqlm>=1.1.0", "To fix: pip install aqlm[gpu]>=1.1.0")
            quantization_config["bits"] = 2

        quant_bits = quantization_config.get("bits", "?")