diff --git a/Makefile b/Makefile index 65be047b..3f13b215 100644 --- a/Makefile +++ b/Makefile @@ -11,4 +11,4 @@ style: ruff format $(check_dirs) test: - pytest tests/ + CUDA_VISIBLE_DEVICES= pytest tests/ diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py index 3d969df1..93153b3e 100644 --- a/src/llamafactory/extras/misc.py +++ b/src/llamafactory/extras/misc.py @@ -22,6 +22,7 @@ from transformers import InfNanRemoveLogitsProcessor, LogitsProcessorList, PreTr from transformers.utils import ( SAFE_WEIGHTS_NAME, WEIGHTS_NAME, + is_safetensors_available, is_torch_bf16_gpu_available, is_torch_cuda_available, is_torch_mps_available, @@ -34,6 +35,11 @@ from .constants import V_HEAD_SAFE_WEIGHTS_NAME, V_HEAD_WEIGHTS_NAME from .logging import get_logger +if is_safetensors_available(): + from safetensors import safe_open + from safetensors.torch import save_file + + _is_fp16_available = is_torch_npu_available() or is_torch_cuda_available() try: _is_bf16_available = is_torch_bf16_gpu_available() @@ -128,9 +134,6 @@ def fix_valuehead_checkpoint( return if safe_serialization: - from safetensors import safe_open - from safetensors.torch import save_file - path_to_checkpoint = os.path.join(output_dir, SAFE_WEIGHTS_NAME) with safe_open(path_to_checkpoint, framework="pt", device="cpu") as f: state_dict: Dict[str, torch.Tensor] = {key: f.get_tensor(key) for key in f.keys()} diff --git a/tests/data/test_supervised.py b/tests/data/test_supervised.py index a72800d2..9f7b2dbf 100644 --- a/tests/data/test_supervised.py +++ b/tests/data/test_supervised.py @@ -41,7 +41,7 @@ TRAIN_ARGS = { } -@pytest.mark.parametrize("num_samples", [10]) +@pytest.mark.parametrize("num_samples", [16]) def test_supervised(num_samples: int): model_args, data_args, training_args, _, _ = get_train_args(TRAIN_ARGS) tokenizer_module = load_tokenizer(model_args) diff --git a/tests/model/model_utils/test_checkpointing.py b/tests/model/model_utils/test_checkpointing.py new file mode 100644 index 00000000..670e693d --- /dev/null +++ b/tests/model/model_utils/test_checkpointing.py @@ -0,0 +1,74 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import torch + +from llamafactory.extras.misc import get_current_device +from llamafactory.hparams import get_train_args +from llamafactory.model import load_model, load_tokenizer + + +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") + +TRAIN_ARGS = { + "model_name_or_path": TINY_LLAMA, + "stage": "sft", + "do_train": True, + "finetuning_type": "lora", + "lora_target": "all", + "dataset": "llamafactory/tiny-supervised-dataset", + "dataset_dir": "ONLINE", + "template": "llama3", + "cutoff_len": 1024, + "overwrite_cache": True, + "output_dir": "dummy_dir", + "overwrite_output_dir": True, + "fp16": True, +} + + +def test_checkpointing_enable(): + model_args, _, _, finetuning_args, _ = get_train_args({"disable_gradient_checkpointing": False, **TRAIN_ARGS}) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + for module in filter(lambda m: hasattr(m, "gradient_checkpointing"), model.modules()): + assert getattr(module, "gradient_checkpointing") is True + + +def test_checkpointing_disable(): + model_args, _, _, finetuning_args, _ = get_train_args({"disable_gradient_checkpointing": True, **TRAIN_ARGS}) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + for module in filter(lambda m: hasattr(m, "gradient_checkpointing"), model.modules()): + assert getattr(module, "gradient_checkpointing") is False + + +def test_upcast_layernorm(): + model_args, _, _, finetuning_args, _ = get_train_args({"upcast_layernorm": True, **TRAIN_ARGS}) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + for name, param in model.named_parameters(): + if param.ndim == 1 and "norm" in name: + assert param.dtype == torch.float32 + + +def test_upcast_lmhead_output(): + model_args, _, _, finetuning_args, _ = get_train_args({"upcast_lmhead_output": True, **TRAIN_ARGS}) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + inputs = torch.randn((1, 16), dtype=torch.float16, device=get_current_device()) + outputs: "torch.Tensor" = model.lm_head(inputs) + assert outputs.dtype == torch.float32 diff --git a/tests/model/test_base.py b/tests/model/test_base.py index 462e8cfa..ee0b2886 100644 --- a/tests/model/test_base.py +++ b/tests/model/test_base.py @@ -13,16 +13,21 @@ # limitations under the License. import os +from typing import Dict import torch from transformers import AutoModelForCausalLM +from trl import AutoModelForCausalLMWithValueHead +from llamafactory.extras.misc import get_current_device from llamafactory.hparams import get_infer_args from llamafactory.model import load_model, load_tokenizer TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") +TINY_LLAMA_VALUEHEAD = os.environ.get("TINY_LLAMA_VALUEHEAD", "llamafactory/tiny-random-Llama-3-valuehead") + INFER_ARGS = { "model_name_or_path": TINY_LLAMA, "template": "llama3", @@ -38,9 +43,32 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module"): assert torch.allclose(state_dict_a[name], state_dict_b[name]) is True +def post_init(self: "AutoModelForCausalLMWithValueHead", state_dict: Dict[str, "torch.Tensor"]): + state_dict = {k[7:]: state_dict[k] for k in state_dict.keys() if k.startswith("v_head.")} + self.v_head.load_state_dict(state_dict, strict=False) + del state_dict + + def test_base(): model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False) - ref_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device) + + ref_model = AutoModelForCausalLM.from_pretrained( + TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device() + ) + compare_model(model, ref_model) + + +def test_valuehead(): + AutoModelForCausalLMWithValueHead.post_init = post_init # patch for CPU test + model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) + tokenizer_module = load_tokenizer(model_args) + model = load_model( + tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False, add_valuehead=True + ) + + ref_model = AutoModelForCausalLMWithValueHead.from_pretrained( + TINY_LLAMA_VALUEHEAD, torch_dtype=torch.float16, device_map=get_current_device() + ) compare_model(model, ref_model) diff --git a/tests/model/test_freeze.py b/tests/model/test_freeze.py index ac5a023c..5f478af6 100644 --- a/tests/model/test_freeze.py +++ b/tests/model/test_freeze.py @@ -49,6 +49,7 @@ def test_freeze_train_all_modules(): model_args, _, _, finetuning_args, _ = get_train_args({"freeze_trainable_layers": 1, **TRAIN_ARGS}) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + for name, param in model.named_parameters(): if name.startswith("model.layers.1."): assert param.requires_grad is True @@ -64,6 +65,7 @@ def test_freeze_train_extra_modules(): ) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + for name, param in model.named_parameters(): if name.startswith("model.layers.1.") or any(module in name for module in ["embed_tokens", "lm_head"]): assert param.requires_grad is True @@ -77,6 +79,7 @@ def test_freeze_inference(): model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False) + for param in model.parameters(): assert param.requires_grad is False assert param.dtype == torch.float16 diff --git a/tests/model/test_full.py b/tests/model/test_full.py index bcd6480f..0a6e0743 100644 --- a/tests/model/test_full.py +++ b/tests/model/test_full.py @@ -49,6 +49,7 @@ def test_full_train(): model_args, _, _, finetuning_args, _ = get_train_args(TRAIN_ARGS) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + for param in model.parameters(): assert param.requires_grad is True assert param.dtype == torch.float32 @@ -58,6 +59,7 @@ def test_full_inference(): model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False) + for param in model.parameters(): assert param.requires_grad is False assert param.dtype == torch.float16 diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py index e49c026c..4923c8ad 100644 --- a/tests/model/test_lora.py +++ b/tests/model/test_lora.py @@ -18,7 +18,9 @@ from typing import Sequence import torch from peft import LoraModel, PeftModel from transformers import AutoModelForCausalLM +from trl import AutoModelForCausalLMWithValueHead +from llamafactory.extras.misc import get_current_device from llamafactory.hparams import get_infer_args, get_train_args from llamafactory.model import load_model, load_tokenizer @@ -27,6 +29,8 @@ TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") TINY_LLAMA_ADAPTER = os.environ.get("TINY_LLAMA_ADAPTER", "llamafactory/tiny-random-Llama-3-lora") +TINY_LLAMA_VALUEHEAD = os.environ.get("TINY_LLAMA_VALUEHEAD", "llamafactory/tiny-random-Llama-3-valuehead") + TRAIN_ARGS = { "model_name_or_path": TINY_LLAMA, "stage": "sft", @@ -67,10 +71,29 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module", diff_k assert torch.allclose(state_dict_a[name], state_dict_b[name]) is True +def test_lora_train_qv_modules(): + model_args, _, _, finetuning_args, _ = get_train_args({"lora_target": "q_proj,v_proj", **TRAIN_ARGS}) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + + linear_modules = set() + for name, param in model.named_parameters(): + if any(module in name for module in ["lora_A", "lora_B"]): + linear_modules.add(name.split(".lora_", maxsplit=1)[0].split(".")[-1]) + assert param.requires_grad is True + assert param.dtype == torch.float32 + else: + assert param.requires_grad is False + assert param.dtype == torch.float16 + + assert linear_modules == {"q_proj", "v_proj"} + + def test_lora_train_all_modules(): model_args, _, _, finetuning_args, _ = get_train_args({"lora_target": "all", **TRAIN_ARGS}) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + linear_modules = set() for name, param in model.named_parameters(): if any(module in name for module in ["lora_A", "lora_B"]): @@ -90,6 +113,7 @@ def test_lora_train_extra_modules(): ) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + extra_modules = set() for name, param in model.named_parameters(): if any(module in name for module in ["lora_A", "lora_B"]): @@ -113,7 +137,9 @@ def test_lora_train_old_adapters(): tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) - base_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device) + base_model = AutoModelForCausalLM.from_pretrained( + TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device() + ) ref_model = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER, is_trainable=True) for param in filter(lambda p: p.requires_grad, ref_model.parameters()): param.data = param.data.to(torch.float32) @@ -128,7 +154,9 @@ def test_lora_train_new_adapters(): tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) - base_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device) + base_model = AutoModelForCausalLM.from_pretrained( + TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device() + ) ref_model = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER, is_trainable=True) for param in filter(lambda p: p.requires_grad, ref_model.parameters()): param.data = param.data.to(torch.float32) @@ -138,17 +166,31 @@ def test_lora_train_new_adapters(): ) +def test_lora_train_valuehead(): + model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) + tokenizer_module = load_tokenizer(model_args) + model = load_model( + tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True, add_valuehead=True + ) + + ref_model: "AutoModelForCausalLMWithValueHead" = AutoModelForCausalLMWithValueHead.from_pretrained( + TINY_LLAMA_VALUEHEAD, torch_dtype=torch.float16, device_map=get_current_device() + ) + state_dict = model.state_dict() + ref_state_dict = ref_model.state_dict() + + assert torch.allclose(state_dict["v_head.summary.weight"], ref_state_dict["v_head.summary.weight"]) + assert torch.allclose(state_dict["v_head.summary.bias"], ref_state_dict["v_head.summary.bias"]) + + def test_lora_inference(): model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False) - base_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device) + base_model = AutoModelForCausalLM.from_pretrained( + TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device() + ) ref_model: "LoraModel" = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER) ref_model = ref_model.merge_and_unload() compare_model(model, ref_model) - - for name, param in model.named_parameters(): - assert param.requires_grad is False - assert param.dtype == torch.float16 - assert "lora" not in name