Merge branch 'hiyouga:main' into main

This commit is contained in:
NULL 2023-07-21 17:00:26 +08:00 committed by GitHub
commit 7007fbc0b6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 15 additions and 12 deletions

View File

@ -1,8 +1,8 @@
torch>=1.13.1 torch>=1.13.1
transformers>=4.29.1 transformers>=4.29.1
datasets>=2.12.0 datasets>=2.12.0
accelerate>=0.19.0 accelerate>=0.21.0
peft>=0.3.0 peft>=0.4.0
trl>=0.4.7 trl>=0.4.7
sentencepiece sentencepiece
jieba jieba

View File

@ -1,4 +1,4 @@
from llmtuner.chat import ChatModel from llmtuner.chat import ChatModel
__version__ = "0.1.2" __version__ = "0.1.3"

View File

@ -1,6 +1,6 @@
import os import os
import torch import torch
from typing import Dict from typing import Dict, Optional
from transformers.trainer import WEIGHTS_NAME, WEIGHTS_INDEX_NAME from transformers.trainer import WEIGHTS_NAME, WEIGHTS_INDEX_NAME
from transformers.modeling_utils import load_sharded_checkpoint from transformers.modeling_utils import load_sharded_checkpoint
@ -12,12 +12,12 @@ from llmtuner.extras.logging import get_logger
logger = get_logger(__name__) logger = get_logger(__name__)
def get_state_dict(model: torch.nn.Module) -> Dict[str, torch.Tensor]: # get state dict containing trainable parameters def get_state_dict(model: torch.nn.Module, trainable_only: Optional[bool] = True) -> Dict[str, torch.Tensor]:
state_dict = model.state_dict() state_dict = model.state_dict()
filtered_state_dict = {} filtered_state_dict = {}
for k, v in model.named_parameters(): for k, v in model.named_parameters():
if v.requires_grad: if (not trainable_only) or v.requires_grad:
filtered_state_dict[k] = state_dict[k].cpu().clone().detach() filtered_state_dict[k] = state_dict[k].cpu().clone().detach()
return filtered_state_dict return filtered_state_dict

View File

@ -27,8 +27,8 @@ logger = get_logger(__name__)
check_min_version("4.29.1") check_min_version("4.29.1")
require_version("datasets>=2.12.0", "To fix: pip install datasets>=2.12.0") require_version("datasets>=2.12.0", "To fix: pip install datasets>=2.12.0")
require_version("accelerate>=0.19.0", "To fix: pip install accelerate>=0.19.0") require_version("accelerate>=0.21.0", "To fix: pip install accelerate>=0.21.0")
require_version("peft>=0.3.0", "To fix: pip install peft>=0.3.0") require_version("peft>=0.4.0", "To fix: pip install peft>=0.4.0")
require_version("trl>=0.4.7", "To fix: pip install trl>=0.4.7") require_version("trl>=0.4.7", "To fix: pip install trl>=0.4.7")
@ -81,9 +81,6 @@ def load_model_and_tokenizer(
elif model_args.quantization_bit == 4: elif model_args.quantization_bit == 4:
require_version("bitsandbytes>=0.39.0", "To fix: pip install bitsandbytes>=0.39.0") require_version("bitsandbytes>=0.39.0", "To fix: pip install bitsandbytes>=0.39.0")
require_version("transformers>=4.30.1", "To fix: pip install transformers>=4.30.1")
require_version("accelerate>=0.20.3", "To fix: pip install accelerate>=0.20.3")
require_version("peft>=0.4.0.dev0", "To fix: pip install git+https://github.com/huggingface/peft.git")
config_kwargs["load_in_4bit"] = True config_kwargs["load_in_4bit"] = True
config_kwargs["quantization_config"] = BitsAndBytesConfig( config_kwargs["quantization_config"] = BitsAndBytesConfig(
load_in_4bit=True, load_in_4bit=True,

View File

@ -56,7 +56,7 @@ class PeftTrainer(Seq2SeqTrainer):
backbone_model.config.use_cache = True backbone_model.config.use_cache = True
backbone_model.save_pretrained( backbone_model.save_pretrained(
output_dir, output_dir,
state_dict=get_state_dict(backbone_model), state_dict=get_state_dict(backbone_model, trainable_only=(self.finetuning_args.finetuning_type != "full")),
safe_serialization=self.args.save_safetensors safe_serialization=self.args.save_safetensors
) )
backbone_model.config.use_cache = False backbone_model.config.use_cache = False

View File

@ -84,6 +84,12 @@ class WebChatModel(ChatModel):
query, history, prefix, max_new_tokens=max_new_tokens, top_p=top_p, temperature=temperature query, history, prefix, max_new_tokens=max_new_tokens, top_p=top_p, temperature=temperature
): ):
response += new_text response += new_text
response = self.postprocess(response)
new_history = history + [(query, response)] new_history = history + [(query, response)]
chatbot[-1] = [query, response] chatbot[-1] = [query, response]
yield chatbot, new_history yield chatbot, new_history
def postprocess(self, response: str) -> str:
response = response.replace("<", "&lt;")
response = response.replace(">", "&gt;")
return response