fix resize vocab at inference #3022

This commit is contained in:
hiyouga 2024-04-03 18:14:24 +08:00
parent ce77d98872
commit 148bda353f
9 changed files with 31 additions and 40 deletions

View File

@ -15,7 +15,7 @@ from transformers import DataCollatorForLanguageModeling, DataCollatorForSeq2Seq
from llmtuner.data import get_dataset from llmtuner.data import get_dataset
from llmtuner.extras.constants import IGNORE_INDEX from llmtuner.extras.constants import IGNORE_INDEX
from llmtuner.hparams import get_train_args from llmtuner.hparams import get_train_args
from llmtuner.model import load_model_and_tokenizer from llmtuner.model import load_tokenizer
BASE_LR = 3e-4 # 1.5e-4 for 30B-70B models BASE_LR = 3e-4 # 1.5e-4 for 30B-70B models
@ -32,7 +32,7 @@ def calculate_lr(
cutoff_len: Optional[int] = 1024, # i.e. maximum input length during training cutoff_len: Optional[int] = 1024, # i.e. maximum input length during training
is_mistral: Optional[bool] = False, # mistral model uses a smaller learning rate, is_mistral: Optional[bool] = False, # mistral model uses a smaller learning rate,
): ):
model_args, data_args, training_args, finetuning_args, _ = get_train_args( model_args, data_args, training_args, _, _ = get_train_args(
dict( dict(
stage=stage, stage=stage,
model_name_or_path=model_name_or_path, model_name_or_path=model_name_or_path,
@ -44,8 +44,8 @@ def calculate_lr(
overwrite_cache=True, overwrite_cache=True,
) )
) )
_, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, is_trainable=False, add_valuehead=False) tokenizer = load_tokenizer(model_args)
trainset = get_dataset(tokenizer, model_args, data_args, training_args, stage=stage) trainset = get_dataset(tokenizer, model_args, data_args, training_args, stage)
if stage == "pt": if stage == "pt":
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False) data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
elif stage == "sft": elif stage == "sft":

View File

@ -10,7 +10,7 @@ from tqdm import tqdm
from llmtuner.data import get_dataset from llmtuner.data import get_dataset
from llmtuner.hparams import get_train_args from llmtuner.hparams import get_train_args
from llmtuner.model import load_model_and_tokenizer from llmtuner.model import load_tokenizer
def length_cdf( def length_cdf(
@ -20,7 +20,7 @@ def length_cdf(
template: Optional[str] = "default", template: Optional[str] = "default",
interval: Optional[int] = 1000, interval: Optional[int] = 1000,
): ):
model_args, data_args, training_args, finetuning_args, _ = get_train_args( model_args, data_args, training_args, _, _ = get_train_args(
dict( dict(
stage="sft", stage="sft",
model_name_or_path=model_name_or_path, model_name_or_path=model_name_or_path,
@ -32,7 +32,7 @@ def length_cdf(
overwrite_cache=True, overwrite_cache=True,
) )
) )
_, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, is_trainable=False, add_valuehead=False) tokenizer = load_tokenizer(model_args)
trainset = get_dataset(tokenizer, model_args, data_args, training_args, stage="sft") trainset = get_dataset(tokenizer, model_args, data_args, training_args, stage="sft")
total_num = len(trainset) total_num = len(trainset)
length_dict = defaultdict(int) length_dict = defaultdict(int)

View File

@ -20,7 +20,7 @@ def get_requires():
extra_require = { extra_require = {
"deepspeed": ["deepspeed"], "deepspeed": ["deepspeed>=0.10.0"],
"metrics": ["nltk", "jieba", "rouge-chinese"], "metrics": ["nltk", "jieba", "rouge-chinese"],
"unsloth": ["torch==2.2.0", "unsloth[cu121-ampere-torch220]"], "unsloth": ["torch==2.2.0", "unsloth[cu121-ampere-torch220]"],
"vllm": ["vllm>=0.3.3"], "vllm": ["vllm>=0.3.3"],

View File

@ -9,7 +9,7 @@ from transformers import GenerationConfig, TextIteratorStreamer
from ..data import get_template_and_fix_tokenizer from ..data import get_template_and_fix_tokenizer
from ..extras.misc import get_logits_processor from ..extras.misc import get_logits_processor
from ..model import load_model_and_tokenizer from ..model import load_model, load_tokenizer
from .base_engine import BaseEngine, Response from .base_engine import BaseEngine, Response
@ -30,11 +30,12 @@ class HuggingfaceEngine(BaseEngine):
generating_args: "GeneratingArguments", generating_args: "GeneratingArguments",
) -> None: ) -> None:
self.can_generate = finetuning_args.stage == "sft" self.can_generate = finetuning_args.stage == "sft"
self.model, self.tokenizer = load_model_and_tokenizer( self.tokenizer = load_tokenizer(model_args)
model_args, finetuning_args, is_trainable=False, add_valuehead=(not self.can_generate)
)
self.tokenizer.padding_side = "left" if self.can_generate else "right" self.tokenizer.padding_side = "left" if self.can_generate else "right"
self.template = get_template_and_fix_tokenizer(self.tokenizer, data_args.template) self.template = get_template_and_fix_tokenizer(self.tokenizer, data_args.template)
self.model = load_model(
self.tokenizer, model_args, finetuning_args, is_trainable=False, add_valuehead=(not self.can_generate)
)
self.generating_args = generating_args.to_dict() self.generating_args = generating_args.to_dict()
@staticmethod @staticmethod

View File

@ -14,16 +14,17 @@ from transformers.utils import cached_file
from ..data import get_template_and_fix_tokenizer from ..data import get_template_and_fix_tokenizer
from ..extras.constants import CHOICES, SUBJECTS from ..extras.constants import CHOICES, SUBJECTS
from ..hparams import get_eval_args from ..hparams import get_eval_args
from ..model import load_model_and_tokenizer from ..model import load_model, load_tokenizer
from .template import get_eval_template from .template import get_eval_template
class Evaluator: class Evaluator:
def __init__(self, args: Optional[Dict[str, Any]] = None) -> None: def __init__(self, args: Optional[Dict[str, Any]] = None) -> None:
self.model_args, self.data_args, self.eval_args, finetuning_args = get_eval_args(args) self.model_args, self.data_args, self.eval_args, finetuning_args = get_eval_args(args)
self.model, self.tokenizer = load_model_and_tokenizer(self.model_args, finetuning_args) self.tokenizer = load_tokenizer(self.model_args)
self.tokenizer.padding_side = "right" # avoid overflow issue in batched inference for llama2 self.tokenizer.padding_side = "right" # avoid overflow issue in batched inference for llama2
self.template = get_template_and_fix_tokenizer(self.tokenizer, self.data_args.template) self.template = get_template_and_fix_tokenizer(self.tokenizer, self.data_args.template)
self.model = load_model(self.tokenizer, self.model_args, finetuning_args)
self.eval_template = get_eval_template(self.eval_args.lang) self.eval_template = get_eval_template(self.eval_args.lang)
self.choice_inputs = [ self.choice_inputs = [
self.tokenizer.encode(self.eval_template.prefix + ch, add_special_tokens=False)[-1] for ch in CHOICES self.tokenizer.encode(self.eval_template.prefix + ch, add_special_tokens=False)[-1] for ch in CHOICES

View File

@ -1,10 +1,9 @@
from .loader import load_model, load_model_and_tokenizer, load_tokenizer from .loader import load_model, load_tokenizer
from .utils import find_all_linear_modules, load_valuehead_params from .utils import find_all_linear_modules, load_valuehead_params
__all__ = [ __all__ = [
"load_model", "load_model",
"load_model_and_tokenizer",
"load_tokenizer", "load_tokenizer",
"load_valuehead_params", "load_valuehead_params",
"find_all_linear_modules", "find_all_linear_modules",

View File

@ -1,4 +1,4 @@
from typing import TYPE_CHECKING, Any, Dict, Tuple from typing import TYPE_CHECKING, Any, Dict
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
from trl import AutoModelForCausalLMWithValueHead from trl import AutoModelForCausalLMWithValueHead
@ -133,17 +133,3 @@ def load_model(
) )
return model return model
def load_model_and_tokenizer(
model_args: "ModelArguments",
finetuning_args: "FinetuningArguments",
is_trainable: bool = False,
add_valuehead: bool = False,
) -> Tuple["PreTrainedModel", "PreTrainedTokenizer"]:
r"""
Loads pretrained model and tokenizer.
"""
tokenizer = load_tokenizer(model_args)
model = load_model(tokenizer, model_args, finetuning_args, is_trainable, add_valuehead)
return model, tokenizer

View File

@ -7,7 +7,7 @@ from ..data import get_template_and_fix_tokenizer
from ..extras.callbacks import LogCallback from ..extras.callbacks import LogCallback
from ..extras.logging import get_logger from ..extras.logging import get_logger
from ..hparams import get_infer_args, get_train_args from ..hparams import get_infer_args, get_train_args
from ..model import load_model_and_tokenizer from ..model import load_model, load_tokenizer
from .dpo import run_dpo from .dpo import run_dpo
from .orpo import run_orpo from .orpo import run_orpo
from .ppo import run_ppo from .ppo import run_ppo
@ -52,8 +52,9 @@ def export_model(args: Optional[Dict[str, Any]] = None):
if model_args.adapter_name_or_path is not None and model_args.export_quantization_bit is not None: if model_args.adapter_name_or_path is not None and model_args.export_quantization_bit is not None:
raise ValueError("Please merge adapters before quantizing the model.") raise ValueError("Please merge adapters before quantizing the model.")
model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args) tokenizer = load_tokenizer(model_args)
get_template_and_fix_tokenizer(tokenizer, data_args.template) get_template_and_fix_tokenizer(tokenizer, data_args.template)
model = load_model(tokenizer, model_args, finetuning_args) # must after fixing tokenizer to resize vocab
if getattr(model, "quantization_method", None) and model_args.adapter_name_or_path is not None: if getattr(model, "quantization_method", None) and model_args.adapter_name_or_path is not None:
raise ValueError("Cannot merge adapters to a quantized model.") raise ValueError("Cannot merge adapters to a quantized model.")

View File

@ -10,7 +10,7 @@ from transformers.utils.versions import require_version
from ..extras.logging import get_logger from ..extras.logging import get_logger
from ..extras.packages import is_galore_available from ..extras.packages import is_galore_available
from ..hparams import FinetuningArguments, ModelArguments from ..hparams import FinetuningArguments, ModelArguments
from ..model import find_all_linear_modules, load_model_and_tokenizer, load_valuehead_params from ..model import find_all_linear_modules, load_model, load_tokenizer, load_valuehead_params
if is_galore_available(): if is_galore_available():
@ -87,16 +87,18 @@ def create_ref_model(
) )
ref_model_args = ModelArguments(**ref_model_args_dict) ref_model_args = ModelArguments(**ref_model_args_dict)
ref_finetuning_args = FinetuningArguments(finetuning_type="lora") ref_finetuning_args = FinetuningArguments(finetuning_type="lora")
ref_model, _ = load_model_and_tokenizer( tokenizer = load_tokenizer(ref_model_args)
ref_model_args, ref_finetuning_args, is_trainable=False, add_valuehead=add_valuehead ref_model = load_model(
tokenizer, ref_model_args, ref_finetuning_args, is_trainable=False, add_valuehead=add_valuehead
) )
logger.info("Created reference model from {}".format(finetuning_args.ref_model)) logger.info("Created reference model from {}".format(finetuning_args.ref_model))
else: else:
if finetuning_args.finetuning_type == "lora": if finetuning_args.finetuning_type == "lora":
ref_model = None ref_model = None
else: else:
ref_model, _ = load_model_and_tokenizer( tokenizer = load_tokenizer(model_args)
model_args, finetuning_args, is_trainable=False, add_valuehead=add_valuehead ref_model = load_model(
tokenizer, model_args, finetuning_args, is_trainable=False, add_valuehead=add_valuehead
) )
logger.info("Created reference model from the model itself.") logger.info("Created reference model from the model itself.")
@ -141,8 +143,9 @@ def create_reward_model(
) )
reward_model_args = ModelArguments(**reward_model_args_dict) reward_model_args = ModelArguments(**reward_model_args_dict)
reward_finetuning_args = FinetuningArguments(finetuning_type="lora") reward_finetuning_args = FinetuningArguments(finetuning_type="lora")
reward_model, _ = load_model_and_tokenizer( tokenizer = load_tokenizer(reward_model_args)
reward_model_args, reward_finetuning_args, is_trainable=False, add_valuehead=True reward_model = load_model(
tokenizer, reward_model_args, reward_finetuning_args, is_trainable=False, add_valuehead=True
) )
logger.info("Loaded full weights of reward model from {}".format(finetuning_args.reward_model)) logger.info("Loaded full weights of reward model from {}".format(finetuning_args.reward_model))
logger.warning("Please ensure the ppo model and reward model share SAME tokenizer and vocabulary.") logger.warning("Please ensure the ppo model and reward model share SAME tokenizer and vocabulary.")