fix tests

This commit is contained in:
hiyouga 2024-01-20 19:58:04 +08:00
parent 38af076a75
commit f6d6e00337
12 changed files with 80 additions and 65 deletions

11
Makefile Normal file
View File

@ -0,0 +1,11 @@
.PHONY: quality style
check_dirs := src tests
quality:
black --check $(check_dirs)
ruff $(check_dirs)
style:
black $(check_dirs)
ruff $(check_dirs) --fix

View File

@ -1,3 +1,37 @@
[build-system] [build-system]
requires = ["setuptools>=61.0"] requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta" build-backend = "setuptools.build_meta"
[tool.black]
line-length = 119
target-version = ["py38"]
[tool.ruff]
ignore = ["C901", "E501", "E741", "W605"]
select = ["C", "E", "F", "I", "W"]
line-length = 119
[tool.ruff.isort]
lines-after-imports = 2
known-first-party = ["llmtuner"]
[isort]
default_section = "FIRSTPARTY"
known_first_party = "llmtuner"
known_third_party = [
"accelerate",
"datasets",
"gradio",
"numpy",
"peft",
"torch",
"transformers",
"trl"
]
line_length = 119
lines_after_imports = 2
multi_line_output = 3
include_trailing_comma = true
force_grid_wrap = 0
use_parentheses = true
ensure_newline_before_comments = true

View File

@ -135,9 +135,9 @@ def merge_dataset(
def get_dataset( def get_dataset(
tokenizer: "PreTrainedTokenizer",
model_args: "ModelArguments", model_args: "ModelArguments",
data_args: "DataArguments", data_args: "DataArguments",
tokenizer: "PreTrainedTokenizer",
training_args: "Seq2SeqTrainingArguments", training_args: "Seq2SeqTrainingArguments",
stage: Literal["pt", "sft", "rm", "ppo"], stage: Literal["pt", "sft", "rm", "ppo"],
# split: Optional[str] = "train", # TODO: add split # split: Optional[str] = "train", # TODO: add split

View File

@ -130,6 +130,20 @@ def llama_flash_attn_forward(
dropout_rate = self.attention_dropout if self.training else 0.0 dropout_rate = self.attention_dropout if self.training else 0.0
input_dtype = query_states.dtype
if input_dtype == torch.float32:
if torch.is_autocast_enabled():
target_dtype = torch.get_autocast_gpu_dtype()
elif hasattr(self.config, "_pre_quantization_dtype"):
target_dtype = self.config._pre_quantization_dtype
else:
target_dtype = self.q_proj.weight.dtype
logger.warning_once("The input hidden states seems to be silently casted in float32.")
query_states = query_states.to(target_dtype)
key_states = key_states.to(target_dtype)
value_states = value_states.to(target_dtype)
if getattr(self.config, "group_size_ratio", None) and self.training: # shift if getattr(self.config, "group_size_ratio", None) and self.training: # shift
groupsz = int(q_len * getattr(self.config, "group_size_ratio")) groupsz = int(q_len * getattr(self.config, "group_size_ratio"))
assert q_len % groupsz == 0, "q_len {} should be divisible by group size {}.".format(q_len, groupsz) assert q_len % groupsz == 0, "q_len {} should be divisible by group size {}.".format(q_len, groupsz)

View File

@ -25,7 +25,7 @@ def run_dpo(
callbacks: Optional[List["TrainerCallback"]] = None callbacks: Optional[List["TrainerCallback"]] = None
): ):
model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train) model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train)
dataset = get_dataset(model_args, data_args, tokenizer, training_args, stage="rm") dataset = get_dataset(tokenizer, model_args, data_args, training_args, stage="rm")
data_collator = DPODataCollatorWithPadding( data_collator = DPODataCollatorWithPadding(
tokenizer=tokenizer, tokenizer=tokenizer,
pad_to_multiple_of=8, pad_to_multiple_of=8,

View File

@ -29,7 +29,7 @@ def run_ppo(
callbacks: Optional[List["TrainerCallback"]] = None callbacks: Optional[List["TrainerCallback"]] = None
): ):
model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train, add_valuehead=True) model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train, add_valuehead=True)
dataset = get_dataset(model_args, data_args, tokenizer, training_args, stage="ppo") dataset = get_dataset(tokenizer, model_args, data_args, training_args, stage="ppo")
tokenizer.padding_side = "left" # use left-padding in generation while using right-padding in training tokenizer.padding_side = "left" # use left-padding in generation while using right-padding in training
data_collator = DataCollatorWithPadding(tokenizer=tokenizer) data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

View File

@ -22,7 +22,7 @@ def run_pt(
callbacks: Optional[List["TrainerCallback"]] = None callbacks: Optional[List["TrainerCallback"]] = None
): ):
model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train) model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train)
dataset = get_dataset(model_args, data_args, tokenizer, training_args, stage="pt") dataset = get_dataset(tokenizer, model_args, data_args, training_args, stage="pt")
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False) data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
# Initialize our Trainer # Initialize our Trainer

View File

@ -26,7 +26,7 @@ def run_rm(
callbacks: Optional[List["TrainerCallback"]] = None callbacks: Optional[List["TrainerCallback"]] = None
): ):
model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train, add_valuehead=True) model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train, add_valuehead=True)
dataset = get_dataset(model_args, data_args, tokenizer, training_args, stage="rm") dataset = get_dataset(tokenizer, model_args, data_args, training_args, stage="rm")
data_collator = PairwiseDataCollatorWithPadding(tokenizer, pad_to_multiple_of=8) data_collator = PairwiseDataCollatorWithPadding(tokenizer, pad_to_multiple_of=8)
# Update arguments # Update arguments

View File

@ -27,7 +27,7 @@ def run_sft(
callbacks: Optional[List["TrainerCallback"]] = None callbacks: Optional[List["TrainerCallback"]] = None
): ):
model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train) model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train)
dataset = get_dataset(model_args, data_args, tokenizer, training_args, stage="sft") dataset = get_dataset(tokenizer, model_args, data_args, training_args, stage="sft")
if training_args.predict_with_generate: if training_args.predict_with_generate:
tokenizer.padding_side = "left" # use left-padding in generation tokenizer.padding_side = "left" # use left-padding in generation

View File

@ -11,9 +11,10 @@ from typing import Optional
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from transformers import DataCollatorForSeq2Seq from transformers import DataCollatorForSeq2Seq
from llmtuner.data import get_dataset, preprocess_dataset from llmtuner.data import get_dataset
from llmtuner.extras.constants import IGNORE_INDEX from llmtuner.extras.constants import IGNORE_INDEX
from llmtuner.model import get_train_args, load_model_and_tokenizer from llmtuner.hparams import get_train_args
from llmtuner.model import load_model_and_tokenizer
BASE_LR = 3e-4 # 1.5e-4 for 30B-70B models BASE_LR = 3e-4 # 1.5e-4 for 30B-70B models
@ -26,7 +27,7 @@ def calculate_lr(
cutoff_len: int, # i.e. maximum input length during training cutoff_len: int, # i.e. maximum input length during training
batch_size: int, # total batch size, namely (batch size * gradient accumulation * world size) batch_size: int, # total batch size, namely (batch size * gradient accumulation * world size)
is_mistral: bool, # mistral model uses a smaller learning rate, is_mistral: bool, # mistral model uses a smaller learning rate,
dataset_dir: Optional[str] = "../data" dataset_dir: Optional[str] = "data"
): ):
model_args, data_args, training_args, finetuning_args, _ = get_train_args(dict( model_args, data_args, training_args, finetuning_args, _ = get_train_args(dict(
stage="sft", stage="sft",
@ -37,9 +38,8 @@ def calculate_lr(
cutoff_len=cutoff_len, cutoff_len=cutoff_len,
output_dir="dummy_dir" output_dir="dummy_dir"
)) ))
trainset = get_dataset(model_args, data_args)
_, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, is_trainable=False, add_valuehead=False) _, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, is_trainable=False, add_valuehead=False)
trainset = preprocess_dataset(trainset, tokenizer, data_args, training_args, stage="sft") trainset = get_dataset(tokenizer, model_args, data_args, training_args, stage="sft")
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, label_pad_token_id=IGNORE_INDEX) data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, label_pad_token_id=IGNORE_INDEX)
dataloader = DataLoader( dataloader = DataLoader(
dataset=trainset, batch_size=batch_size, shuffle=True, collate_fn=data_collator, pin_memory=True dataset=trainset, batch_size=batch_size, shuffle=True, collate_fn=data_collator, pin_memory=True

View File

@ -7,11 +7,15 @@ import os
import fire import fire
import torch import torch
import torch.nn as nn import torch.nn as nn
from typing import Optional from typing import TYPE_CHECKING, Optional
from transformers import AutoModelForCausalLM, AutoTokenizer from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import LoftQConfig, LoraConfig, TaskType, get_peft_model from peft import LoftQConfig, LoraConfig, TaskType, get_peft_model
if TYPE_CHECKING:
from transformers import PreTrainedModel
class Shell(nn.Module): class Shell(nn.Module):
def __init__(self, weight: torch.Tensor, bias: Optional[torch.Tensor] = None): def __init__(self, weight: torch.Tensor, bias: Optional[torch.Tensor] = None):
@ -42,7 +46,8 @@ def quantize_loftq(
loftq_iter: Optional[int] = 1, loftq_iter: Optional[int] = 1,
lora_alpha: Optional[int] = None, lora_alpha: Optional[int] = None,
lora_rank: Optional[int] = 16, lora_rank: Optional[int] = 16,
lora_target: Optional[str] = "q_proj,v_proj" lora_target: Optional[str] = "q_proj,v_proj",
save_safetensors: Optional[bool] = False,
): ):
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto") model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto")
@ -60,16 +65,16 @@ def quantize_loftq(
# Init LoftQ model # Init LoftQ model
lora_model = get_peft_model(model, lora_config) lora_model = get_peft_model(model, lora_config)
base_model = lora_model.get_base_model() base_model: "PreTrainedModel" = lora_model.get_base_model()
# Save LoftQ model # Save LoftQ model
setattr(lora_model.base_model.peft_config["default"], "base_model_name_or_path", save_dir) setattr(lora_model.base_model.peft_config["default"], "base_model_name_or_path", save_dir)
setattr(lora_model.base_model.peft_config["default"], "init_lora_weights", True) setattr(lora_model.base_model.peft_config["default"], "init_lora_weights", True)
lora_model.save_pretrained(os.path.join(save_dir, "adapters")) lora_model.save_pretrained(os.path.join(save_dir, "adapters"), safe_serialization=save_safetensors)
# Save base model # Save base model
unwrap_model(base_model) unwrap_model(base_model)
base_model.save_pretrained(save_dir) base_model.save_pretrained(save_dir, safe_serialization=save_safetensors)
tokenizer.save_pretrained(save_dir) tokenizer.save_pretrained(save_dir)

View File

@ -1,49 +0,0 @@
# coding=utf-8
# Quantizes models with AutoGPTQ (https://github.com/PanQiWei/AutoGPTQ).
# Usage: python quantize.py --input_dir path_to_llama_model --output_dir path_to_quant_model --data_file alpaca.json
# --max_length 1024 --max_samples 1024
# dataset format: instruction (string), input (string), output (string), history (List[string])
import fire
from datasets import load_dataset
from transformers import AutoTokenizer
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
def quantize(input_dir: str, output_dir: str, data_file: str, max_length: int, max_samples: int):
tokenizer = AutoTokenizer.from_pretrained(input_dir, use_fast=False, padding_side="left")
def format_example(examples):
prefix=("A chat between a curious user and an artificial intelligence assistant. "
"The assistant gives helpful, detailed, and polite answers to the user's questions.")
texts = []
for i in range(len(examples["instruction"])):
prompt = prefix + "\n"
if "history" in examples:
for user_query, bot_resp in examples["history"][i]:
prompt += "Human: {}\nAssistant: {}\n".format(user_query, bot_resp)
prompt += "Human: {}\nAssistant: {}".format(
examples["instruction"][i] + "\n" + examples["input"][i], examples["output"][i]
)
texts.append(prompt)
return tokenizer(texts, truncation=True, max_length=max_length)
dataset = load_dataset("json", data_files=data_file)["train"]
column_names = list(dataset.column_names)
dataset = dataset.select(range(min(len(dataset), max_samples)))
dataset = dataset.map(format_example, batched=True, remove_columns=column_names)
dataset = dataset.shuffle()
quantize_config = BaseQuantizeConfig(
bits=4,
group_size=128,
desc_act=False
)
model = AutoGPTQForCausalLM.from_pretrained(input_dir, quantize_config, trust_remote_code=True)
model.quantize(dataset)
model.save_quantized(output_dir)
if __name__ == "__main__":
fire.Fire(quantize)