diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..fec4443e --- /dev/null +++ b/Makefile @@ -0,0 +1,11 @@ +.PHONY: quality style + +check_dirs := src tests + +quality: + black --check $(check_dirs) + ruff $(check_dirs) + +style: + black $(check_dirs) + ruff $(check_dirs) --fix diff --git a/pyproject.toml b/pyproject.toml index 638dd9c5..a771b417 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,37 @@ [build-system] requires = ["setuptools>=61.0"] build-backend = "setuptools.build_meta" + +[tool.black] +line-length = 119 +target-version = ["py38"] + +[tool.ruff] +ignore = ["C901", "E501", "E741", "W605"] +select = ["C", "E", "F", "I", "W"] +line-length = 119 + +[tool.ruff.isort] +lines-after-imports = 2 +known-first-party = ["llmtuner"] + +[isort] +default_section = "FIRSTPARTY" +known_first_party = "llmtuner" +known_third_party = [ + "accelerate", + "datasets", + "gradio", + "numpy", + "peft", + "torch", + "transformers", + "trl" +] +line_length = 119 +lines_after_imports = 2 +multi_line_output = 3 +include_trailing_comma = true +force_grid_wrap = 0 +use_parentheses = true +ensure_newline_before_comments = true diff --git a/src/llmtuner/data/loader.py b/src/llmtuner/data/loader.py index 2ea00932..ea396c1f 100644 --- a/src/llmtuner/data/loader.py +++ b/src/llmtuner/data/loader.py @@ -135,9 +135,9 @@ def merge_dataset( def get_dataset( + tokenizer: "PreTrainedTokenizer", model_args: "ModelArguments", data_args: "DataArguments", - tokenizer: "PreTrainedTokenizer", training_args: "Seq2SeqTrainingArguments", stage: Literal["pt", "sft", "rm", "ppo"], # split: Optional[str] = "train", # TODO: add split diff --git a/src/llmtuner/extras/patches/llama_patch.py b/src/llmtuner/extras/patches/llama_patch.py index e7d9e48f..9e0bcdf5 100644 --- a/src/llmtuner/extras/patches/llama_patch.py +++ b/src/llmtuner/extras/patches/llama_patch.py @@ -130,6 +130,20 @@ def llama_flash_attn_forward( dropout_rate = self.attention_dropout if self.training else 0.0 + input_dtype = query_states.dtype + if input_dtype == torch.float32: + if torch.is_autocast_enabled(): + target_dtype = torch.get_autocast_gpu_dtype() + elif hasattr(self.config, "_pre_quantization_dtype"): + target_dtype = self.config._pre_quantization_dtype + else: + target_dtype = self.q_proj.weight.dtype + + logger.warning_once("The input hidden states seems to be silently casted in float32.") + query_states = query_states.to(target_dtype) + key_states = key_states.to(target_dtype) + value_states = value_states.to(target_dtype) + if getattr(self.config, "group_size_ratio", None) and self.training: # shift groupsz = int(q_len * getattr(self.config, "group_size_ratio")) assert q_len % groupsz == 0, "q_len {} should be divisible by group size {}.".format(q_len, groupsz) diff --git a/src/llmtuner/train/dpo/workflow.py b/src/llmtuner/train/dpo/workflow.py index bd61a308..3868d1d9 100644 --- a/src/llmtuner/train/dpo/workflow.py +++ b/src/llmtuner/train/dpo/workflow.py @@ -25,7 +25,7 @@ def run_dpo( callbacks: Optional[List["TrainerCallback"]] = None ): model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train) - dataset = get_dataset(model_args, data_args, tokenizer, training_args, stage="rm") + dataset = get_dataset(tokenizer, model_args, data_args, training_args, stage="rm") data_collator = DPODataCollatorWithPadding( tokenizer=tokenizer, pad_to_multiple_of=8, diff --git a/src/llmtuner/train/ppo/workflow.py b/src/llmtuner/train/ppo/workflow.py index 7b0dcc53..19df2d3d 100644 --- a/src/llmtuner/train/ppo/workflow.py +++ b/src/llmtuner/train/ppo/workflow.py @@ -29,7 +29,7 @@ def run_ppo( callbacks: Optional[List["TrainerCallback"]] = None ): model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train, add_valuehead=True) - dataset = get_dataset(model_args, data_args, tokenizer, training_args, stage="ppo") + dataset = get_dataset(tokenizer, model_args, data_args, training_args, stage="ppo") tokenizer.padding_side = "left" # use left-padding in generation while using right-padding in training data_collator = DataCollatorWithPadding(tokenizer=tokenizer) diff --git a/src/llmtuner/train/pt/workflow.py b/src/llmtuner/train/pt/workflow.py index 3b7267eb..f6186b25 100644 --- a/src/llmtuner/train/pt/workflow.py +++ b/src/llmtuner/train/pt/workflow.py @@ -22,7 +22,7 @@ def run_pt( callbacks: Optional[List["TrainerCallback"]] = None ): model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train) - dataset = get_dataset(model_args, data_args, tokenizer, training_args, stage="pt") + dataset = get_dataset(tokenizer, model_args, data_args, training_args, stage="pt") data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False) # Initialize our Trainer diff --git a/src/llmtuner/train/rm/workflow.py b/src/llmtuner/train/rm/workflow.py index e055e216..97c22f7d 100644 --- a/src/llmtuner/train/rm/workflow.py +++ b/src/llmtuner/train/rm/workflow.py @@ -26,7 +26,7 @@ def run_rm( callbacks: Optional[List["TrainerCallback"]] = None ): model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train, add_valuehead=True) - dataset = get_dataset(model_args, data_args, tokenizer, training_args, stage="rm") + dataset = get_dataset(tokenizer, model_args, data_args, training_args, stage="rm") data_collator = PairwiseDataCollatorWithPadding(tokenizer, pad_to_multiple_of=8) # Update arguments diff --git a/src/llmtuner/train/sft/workflow.py b/src/llmtuner/train/sft/workflow.py index 6d3f34e8..50287c8f 100644 --- a/src/llmtuner/train/sft/workflow.py +++ b/src/llmtuner/train/sft/workflow.py @@ -27,7 +27,7 @@ def run_sft( callbacks: Optional[List["TrainerCallback"]] = None ): model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train) - dataset = get_dataset(model_args, data_args, tokenizer, training_args, stage="sft") + dataset = get_dataset(tokenizer, model_args, data_args, training_args, stage="sft") if training_args.predict_with_generate: tokenizer.padding_side = "left" # use left-padding in generation diff --git a/tests/cal_lr.py b/tests/cal_lr.py index 0cbaa0b9..2e74e3f4 100644 --- a/tests/cal_lr.py +++ b/tests/cal_lr.py @@ -11,9 +11,10 @@ from typing import Optional from torch.utils.data import DataLoader from transformers import DataCollatorForSeq2Seq -from llmtuner.data import get_dataset, preprocess_dataset +from llmtuner.data import get_dataset from llmtuner.extras.constants import IGNORE_INDEX -from llmtuner.model import get_train_args, load_model_and_tokenizer +from llmtuner.hparams import get_train_args +from llmtuner.model import load_model_and_tokenizer BASE_LR = 3e-4 # 1.5e-4 for 30B-70B models @@ -26,7 +27,7 @@ def calculate_lr( cutoff_len: int, # i.e. maximum input length during training batch_size: int, # total batch size, namely (batch size * gradient accumulation * world size) is_mistral: bool, # mistral model uses a smaller learning rate, - dataset_dir: Optional[str] = "../data" + dataset_dir: Optional[str] = "data" ): model_args, data_args, training_args, finetuning_args, _ = get_train_args(dict( stage="sft", @@ -37,9 +38,8 @@ def calculate_lr( cutoff_len=cutoff_len, output_dir="dummy_dir" )) - trainset = get_dataset(model_args, data_args) _, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, is_trainable=False, add_valuehead=False) - trainset = preprocess_dataset(trainset, tokenizer, data_args, training_args, stage="sft") + trainset = get_dataset(tokenizer, model_args, data_args, training_args, stage="sft") data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, label_pad_token_id=IGNORE_INDEX) dataloader = DataLoader( dataset=trainset, batch_size=batch_size, shuffle=True, collate_fn=data_collator, pin_memory=True diff --git a/tests/loftq_init.py b/tests/loftq_init.py index 32cb96e0..637b5c97 100644 --- a/tests/loftq_init.py +++ b/tests/loftq_init.py @@ -7,11 +7,15 @@ import os import fire import torch import torch.nn as nn -from typing import Optional +from typing import TYPE_CHECKING, Optional from transformers import AutoModelForCausalLM, AutoTokenizer from peft import LoftQConfig, LoraConfig, TaskType, get_peft_model +if TYPE_CHECKING: + from transformers import PreTrainedModel + + class Shell(nn.Module): def __init__(self, weight: torch.Tensor, bias: Optional[torch.Tensor] = None): @@ -42,7 +46,8 @@ def quantize_loftq( loftq_iter: Optional[int] = 1, lora_alpha: Optional[int] = None, lora_rank: Optional[int] = 16, - lora_target: Optional[str] = "q_proj,v_proj" + lora_target: Optional[str] = "q_proj,v_proj", + save_safetensors: Optional[bool] = False, ): tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto") @@ -60,16 +65,16 @@ def quantize_loftq( # Init LoftQ model lora_model = get_peft_model(model, lora_config) - base_model = lora_model.get_base_model() + base_model: "PreTrainedModel" = lora_model.get_base_model() # Save LoftQ model setattr(lora_model.base_model.peft_config["default"], "base_model_name_or_path", save_dir) setattr(lora_model.base_model.peft_config["default"], "init_lora_weights", True) - lora_model.save_pretrained(os.path.join(save_dir, "adapters")) + lora_model.save_pretrained(os.path.join(save_dir, "adapters"), safe_serialization=save_safetensors) # Save base model unwrap_model(base_model) - base_model.save_pretrained(save_dir) + base_model.save_pretrained(save_dir, safe_serialization=save_safetensors) tokenizer.save_pretrained(save_dir) diff --git a/tests/quantize.py b/tests/quantize.py deleted file mode 100644 index 7b529671..00000000 --- a/tests/quantize.py +++ /dev/null @@ -1,49 +0,0 @@ -# coding=utf-8 -# Quantizes models with AutoGPTQ (https://github.com/PanQiWei/AutoGPTQ). -# Usage: python quantize.py --input_dir path_to_llama_model --output_dir path_to_quant_model --data_file alpaca.json -# --max_length 1024 --max_samples 1024 -# dataset format: instruction (string), input (string), output (string), history (List[string]) - -import fire -from datasets import load_dataset -from transformers import AutoTokenizer -from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig - - -def quantize(input_dir: str, output_dir: str, data_file: str, max_length: int, max_samples: int): - tokenizer = AutoTokenizer.from_pretrained(input_dir, use_fast=False, padding_side="left") - - def format_example(examples): - prefix=("A chat between a curious user and an artificial intelligence assistant. " - "The assistant gives helpful, detailed, and polite answers to the user's questions.") - texts = [] - for i in range(len(examples["instruction"])): - prompt = prefix + "\n" - if "history" in examples: - for user_query, bot_resp in examples["history"][i]: - prompt += "Human: {}\nAssistant: {}\n".format(user_query, bot_resp) - prompt += "Human: {}\nAssistant: {}".format( - examples["instruction"][i] + "\n" + examples["input"][i], examples["output"][i] - ) - texts.append(prompt) - return tokenizer(texts, truncation=True, max_length=max_length) - - dataset = load_dataset("json", data_files=data_file)["train"] - column_names = list(dataset.column_names) - dataset = dataset.select(range(min(len(dataset), max_samples))) - dataset = dataset.map(format_example, batched=True, remove_columns=column_names) - dataset = dataset.shuffle() - - quantize_config = BaseQuantizeConfig( - bits=4, - group_size=128, - desc_act=False - ) - - model = AutoGPTQForCausalLM.from_pretrained(input_dir, quantize_config, trust_remote_code=True) - model.quantize(dataset) - model.save_quantized(output_dir) - - -if __name__ == "__main__": - fire.Fire(quantize)