diff --git a/pyproject.toml b/pyproject.toml index a771b417..44a6e497 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ line-length = 119 target-version = ["py38"] [tool.ruff] -ignore = ["C901", "E501", "E741", "W605"] +ignore = ["C408", "C901", "E501", "E731", "E741", "W605"] select = ["C", "E", "F", "I", "W"] line-length = 119 diff --git a/src/api_demo.py b/src/api_demo.py index 20350881..a7140675 100644 --- a/src/api_demo.py +++ b/src/api_demo.py @@ -1,4 +1,5 @@ import os + import uvicorn from llmtuner import ChatModel, create_app diff --git a/src/cli_demo.py b/src/cli_demo.py index 781aa551..39704153 100644 --- a/src/cli_demo.py +++ b/src/cli_demo.py @@ -1,10 +1,12 @@ from llmtuner import ChatModel from llmtuner.extras.misc import torch_gc + try: import platform + if platform.system() != "Windows": - import readline + import readline # noqa: F401 except ImportError: print("Install `readline` for a better experience.") diff --git a/src/llmtuner/__init__.py b/src/llmtuner/__init__.py index bdbd5af2..512b3fbc 100644 --- a/src/llmtuner/__init__.py +++ b/src/llmtuner/__init__.py @@ -8,12 +8,4 @@ from llmtuner.webui import create_ui, create_web_demo __version__ = "0.4.0" -__all__ = [ - "create_app", - "ChatModel", - "Evaluator", - "export_model", - "run_exp", - "create_ui", - "create_web_demo" -] +__all__ = ["create_app", "ChatModel", "Evaluator", "export_model", "run_exp", "create_ui", "create_web_demo"] diff --git a/src/llmtuner/api/app.py b/src/llmtuner/api/app.py index f8115227..d50e9137 100644 --- a/src/llmtuner/api/app.py +++ b/src/llmtuner/api/app.py @@ -1,30 +1,29 @@ -import os -import json import asyncio -from typing import List, Tuple -from pydantic import BaseModel +import json +import os from contextlib import asynccontextmanager +from typing import List, Tuple +from pydantic import BaseModel + +from ..chat import ChatModel +from ..extras.misc import torch_gc +from ..extras.packages import is_fastapi_availble, is_starlette_available, is_uvicorn_available from .protocol import ( - Role, - Finish, - ModelCard, - ModelList, - ChatMessage, - DeltaMessage, ChatCompletionRequest, ChatCompletionResponse, - ChatCompletionStreamResponse, ChatCompletionResponseChoice, ChatCompletionResponseStreamChoice, ChatCompletionResponseUsage, + ChatCompletionStreamResponse, + ChatMessage, + DeltaMessage, + Finish, + ModelCard, + ModelList, + Role, ScoreEvaluationRequest, - ScoreEvaluationResponse -) -from ..chat import ChatModel -from ..extras.misc import torch_gc -from ..extras.packages import ( - is_fastapi_availble, is_starlette_available, is_uvicorn_available + ScoreEvaluationResponse, ) @@ -42,15 +41,15 @@ if is_uvicorn_available(): @asynccontextmanager -async def lifespan(app: "FastAPI"): # collects GPU memory +async def lifespan(app: "FastAPI"): # collects GPU memory yield torch_gc() def to_json(data: BaseModel) -> str: - try: # pydantic v2 + try: # pydantic v2 return json.dumps(data.model_dump(exclude_unset=True), ensure_ascii=False) - except: # pydantic v1 + except Exception: # pydantic v1 return data.json(exclude_unset=True, ensure_ascii=False) @@ -90,8 +89,8 @@ def create_app(chat_model: "ChatModel") -> "FastAPI": history = [] if len(prev_messages) % 2 == 0: for i in range(0, len(prev_messages), 2): - if prev_messages[i].role == Role.USER and prev_messages[i+1].role == Role.ASSISTANT: - history.append([prev_messages[i].content, prev_messages[i+1].content]) + if prev_messages[i].role == Role.USER and prev_messages[i + 1].role == Role.ASSISTANT: + history.append([prev_messages[i].content, prev_messages[i + 1].content]) else: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Only supports u/a/u/a/u...") else: @@ -107,65 +106,65 @@ def create_app(chat_model: "ChatModel") -> "FastAPI": return EventSourceResponse(generate, media_type="text/event-stream") responses = chat_model.chat( - query, history, system, + query, + history, + system, do_sample=request.do_sample, temperature=request.temperature, top_p=request.top_p, max_new_tokens=request.max_tokens, - num_return_sequences=request.n + num_return_sequences=request.n, ) prompt_length, response_length = 0, 0 choices = [] for i, response in enumerate(responses): - choices.append(ChatCompletionResponseChoice( - index=i, - message=ChatMessage(role=Role.ASSISTANT, content=response.response_text), - finish_reason=Finish.STOP if response.finish_reason == "stop" else Finish.LENGTH - )) + choices.append( + ChatCompletionResponseChoice( + index=i, + message=ChatMessage(role=Role.ASSISTANT, content=response.response_text), + finish_reason=Finish.STOP if response.finish_reason == "stop" else Finish.LENGTH, + ) + ) prompt_length = response.prompt_length response_length += response.response_length usage = ChatCompletionResponseUsage( prompt_tokens=prompt_length, completion_tokens=response_length, - total_tokens=prompt_length+response_length + total_tokens=prompt_length + response_length, ) return ChatCompletionResponse(model=request.model, choices=choices, usage=usage) - def stream_chat_completion(query: str, history: List[Tuple[str, str]], system: str, request: ChatCompletionRequest): + def stream_chat_completion( + query: str, history: List[Tuple[str, str]], system: str, request: ChatCompletionRequest + ): choice_data = ChatCompletionResponseStreamChoice( - index=0, - delta=DeltaMessage(role=Role.ASSISTANT, content=""), - finish_reason=None + index=0, delta=DeltaMessage(role=Role.ASSISTANT, content=""), finish_reason=None ) chunk = ChatCompletionStreamResponse(model=request.model, choices=[choice_data]) yield to_json(chunk) for new_text in chat_model.stream_chat( - query, history, system, + query, + history, + system, do_sample=request.do_sample, temperature=request.temperature, top_p=request.top_p, - max_new_tokens=request.max_tokens + max_new_tokens=request.max_tokens, ): if len(new_text) == 0: continue choice_data = ChatCompletionResponseStreamChoice( - index=0, - delta=DeltaMessage(content=new_text), - finish_reason=None + index=0, delta=DeltaMessage(content=new_text), finish_reason=None ) chunk = ChatCompletionStreamResponse(model=request.model, choices=[choice_data]) yield to_json(chunk) - choice_data = ChatCompletionResponseStreamChoice( - index=0, - delta=DeltaMessage(), - finish_reason=Finish.STOP - ) + choice_data = ChatCompletionResponseStreamChoice(index=0, delta=DeltaMessage(), finish_reason=Finish.STOP) chunk = ChatCompletionStreamResponse(model=request.model, choices=[choice_data]) yield to_json(chunk) yield "[DONE]" diff --git a/src/llmtuner/api/protocol.py b/src/llmtuner/api/protocol.py index 42569f84..862f8b77 100644 --- a/src/llmtuner/api/protocol.py +++ b/src/llmtuner/api/protocol.py @@ -1,8 +1,9 @@ import time from enum import Enum, unique -from pydantic import BaseModel, Field from typing import List, Optional +from pydantic import BaseModel, Field + @unique class Role(str, Enum): diff --git a/src/llmtuner/chat/chat_model.py b/src/llmtuner/chat/chat_model.py index 88846dee..f6cbbddc 100644 --- a/src/llmtuner/chat/chat_model.py +++ b/src/llmtuner/chat/chat_model.py @@ -1,18 +1,18 @@ -import torch from dataclasses import dataclass -from typing import Any, Dict, Generator, List, Literal, Optional, Tuple from threading import Thread +from typing import Any, Dict, Generator, List, Literal, Optional, Tuple + +import torch from transformers import GenerationConfig, TextIteratorStreamer -from ..data import get_template_and_fix_tokenizer, Role +from ..data import Role, get_template_and_fix_tokenizer from ..extras.misc import get_logits_processor -from ..model import dispatch_model, load_model_and_tokenizer from ..hparams import get_infer_args +from ..model import dispatch_model, load_model_and_tokenizer @dataclass class Response: - response_text: str response_length: int prompt_length: int @@ -20,10 +20,9 @@ class Response: class ChatModel: - def __init__(self, args: Optional[Dict[str, Any]] = None) -> None: model_args, data_args, finetuning_args, self.generating_args = get_infer_args(args) - self.can_generate = (finetuning_args.stage == "sft") + self.can_generate = finetuning_args.stage == "sft" self.model, self.tokenizer = load_model_and_tokenizer( model_args, finetuning_args, is_trainable=False, add_valuehead=(not self.can_generate) ) @@ -37,7 +36,7 @@ class ChatModel: history: Optional[List[Tuple[str, str]]] = None, system: Optional[str] = None, tools: Optional[str] = None, - **input_kwargs + **input_kwargs, ) -> Tuple[Dict[str, Any], int]: messages = [] if history is not None: @@ -63,16 +62,18 @@ class ChatModel: max_new_tokens = input_kwargs.pop("max_new_tokens", None) generating_args = self.generating_args.to_dict() - generating_args.update(dict( - do_sample=do_sample if do_sample is not None else generating_args["do_sample"], - temperature=temperature or generating_args["temperature"], - top_p=top_p or generating_args["top_p"], - top_k=top_k or generating_args["top_k"], - num_return_sequences=num_return_sequences or 1, - repetition_penalty=repetition_penalty or generating_args["repetition_penalty"], - eos_token_id=[self.tokenizer.eos_token_id] + self.tokenizer.additional_special_tokens_ids, - pad_token_id=self.tokenizer.pad_token_id - )) + generating_args.update( + dict( + do_sample=do_sample if do_sample is not None else generating_args["do_sample"], + temperature=temperature or generating_args["temperature"], + top_p=top_p or generating_args["top_p"], + top_k=top_k or generating_args["top_k"], + num_return_sequences=num_return_sequences or 1, + repetition_penalty=repetition_penalty or generating_args["repetition_penalty"], + eos_token_id=[self.tokenizer.eos_token_id] + self.tokenizer.additional_special_tokens_ids, + pad_token_id=self.tokenizer.pad_token_id, + ) + ) if isinstance(num_return_sequences, int) and num_return_sequences > 1: generating_args["do_sample"] = True @@ -88,7 +89,7 @@ class ChatModel: gen_kwargs = dict( inputs=input_ids, generation_config=GenerationConfig(**generating_args), - logits_processor=get_logits_processor() + logits_processor=get_logits_processor(), ) return gen_kwargs, prompt_length @@ -100,7 +101,7 @@ class ChatModel: history: Optional[List[Tuple[str, str]]] = None, system: Optional[str] = None, tools: Optional[str] = None, - **input_kwargs + **input_kwargs, ) -> List[Response]: r""" Args: query, history, system, **input_kwargs @@ -117,12 +118,14 @@ class ChatModel: for i in range(len(response)): eos_index = (response_ids[i] == self.tokenizer.eos_token_id).nonzero() response_length = (eos_index[0].item() + 1) if len(eos_index) else len(response_ids[i]) - results.append(Response( - response_text=response[i], - response_length=response_length, - prompt_length=prompt_length, - finish_reason="stop" if len(eos_index) else "length" - )) + results.append( + Response( + response_text=response[i], + response_length=response_length, + prompt_length=prompt_length, + finish_reason="stop" if len(eos_index) else "length", + ) + ) return results @@ -133,7 +136,7 @@ class ChatModel: history: Optional[List[Tuple[str, str]]] = None, system: Optional[str] = None, tools: Optional[str] = None, - **input_kwargs + **input_kwargs, ) -> Generator[str, None, None]: gen_kwargs, _ = self._process_args(query, history, system, tools, **input_kwargs) streamer = TextIteratorStreamer(self.tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True) @@ -145,11 +148,7 @@ class ChatModel: yield from streamer @torch.inference_mode() - def get_scores( - self, - batch_input: List[str], - **input_kwargs - ) -> List[float]: + def get_scores(self, batch_input: List[str], **input_kwargs) -> List[float]: max_length = input_kwargs.pop("max_length", None) device = getattr(self.model.pretrained_model, "device", "cuda") @@ -159,7 +158,7 @@ class ChatModel: truncation=True, max_length=max_length or getattr(self.model.config, "max_position_embeddings", 1024), return_tensors="pt", - add_special_tokens=True + add_special_tokens=True, ).to(device) input_ids: torch.Tensor = inputs["input_ids"] diff --git a/src/llmtuner/data/__init__.py b/src/llmtuner/data/__init__.py index 85be70b7..c941faca 100644 --- a/src/llmtuner/data/__init__.py +++ b/src/llmtuner/data/__init__.py @@ -1,6 +1,6 @@ from .loader import get_dataset from .template import get_template_and_fix_tokenizer, templates -from .utils import split_dataset, Role +from .utils import Role, split_dataset __all__ = ["get_dataset", "get_template_and_fix_tokenizer", "templates", "split_dataset", "Role"] diff --git a/src/llmtuner/data/aligner.py b/src/llmtuner/data/aligner.py index 7c27f837..b8149589 100644 --- a/src/llmtuner/data/aligner.py +++ b/src/llmtuner/data/aligner.py @@ -27,7 +27,9 @@ def convert_alpaca(examples: Dict[str, List[Any]], dataset_attr: "DatasetAttr") if dataset_attr.response: if isinstance(examples[dataset_attr.response][i], list): - response = [{"role": Role.ASSISTANT, "content": content} for content in examples[dataset_attr.response][i]] + response = [ + {"role": Role.ASSISTANT, "content": content} for content in examples[dataset_attr.response][i] + ] else: response = [{"role": Role.ASSISTANT, "content": examples[dataset_attr.response][i]}] else: @@ -47,10 +49,10 @@ def convert_sharegpt(examples: Dict[str, List[Any]], dataset_attr: "DatasetAttr" dataset_attr.user_tag: Role.USER, dataset_attr.assistant_tag: Role.ASSISTANT, dataset_attr.observation_tag: Role.OBSERVATION, - dataset_attr.function_tag: Role.FUNCTION + dataset_attr.function_tag: Role.FUNCTION, } for i, messages in enumerate(examples[dataset_attr.messages]): - messages = messages[:len(messages) // 2 * 2] # should be multiples of 2 + messages = messages[: len(messages) // 2 * 2] # should be multiples of 2 if len(messages) == 0: continue @@ -65,7 +67,9 @@ def convert_sharegpt(examples: Dict[str, List[Any]], dataset_attr: "DatasetAttr" if message[dataset_attr.role_tag] not in accept_tags: raise ValueError("Invalid role tag in {}.".format(messages)) - prompt.append({"role": tag_mapping[message[dataset_attr.role_tag]], "content": message[dataset_attr.content_tag]}) + prompt.append( + {"role": tag_mapping[message[dataset_attr.role_tag]], "content": message[dataset_attr.content_tag]} + ) last_message = prompt.pop(-1) response.append(last_message) @@ -98,12 +102,7 @@ def align_dataset( kwargs = dict( num_proc=data_args.preprocessing_num_workers, load_from_cache_file=(not data_args.overwrite_cache), - desc="Converting format of dataset" + desc="Converting format of dataset", ) - return dataset.map( - convert_func, - batched=True, - remove_columns=column_names, - **kwargs - ) + return dataset.map(convert_func, batched=True, remove_columns=column_names, **kwargs) diff --git a/src/llmtuner/data/formatter.py b/src/llmtuner/data/formatter.py index f1c35308..6451d8e3 100644 --- a/src/llmtuner/data/formatter.py +++ b/src/llmtuner/data/formatter.py @@ -76,7 +76,11 @@ class ToolFormatter: required = ", required" if name in tool["parameters"].get("required", []) else "" enum = ", should be one of [{}]".format(", ".join(param["enum"])) if param.get("enum", None) else "" param_text += " - {name} ({type}{required}): {desc}{enum}\n".format( - name=name, type=param.get("type", ""), required=required, desc=param.get("description", ""), enum=enum + name=name, + type=param.get("type", ""), + required=required, + desc=param.get("description", ""), + enum=enum, ) tool_text += "> Tool Name: {name}\nTool Description: {desc}\nTool Args:\n{args}\n".format( @@ -85,9 +89,7 @@ class ToolFormatter: tool_names.append(tool["name"]) return TOOL_SYSTEM_PROMPT.format( - tool_text=tool_text, - tool_names=", ".join(tool_names), - format_prompt=JSON_FORMAT_PROMPT + tool_text=tool_text, tool_names=", ".join(tool_names), format_prompt=JSON_FORMAT_PROMPT ) def __call__(self, content: str) -> List[Union[str, Dict[str, str]]]: diff --git a/src/llmtuner/data/loader.py b/src/llmtuner/data/loader.py index ea396c1f..c4b86e38 100644 --- a/src/llmtuner/data/loader.py +++ b/src/llmtuner/data/loader.py @@ -1,16 +1,16 @@ -import os import inspect +import os from typing import TYPE_CHECKING, List, Literal, Union from datasets import concatenate_datasets, interleave_datasets, load_dataset, load_from_disk from ..extras.constants import FILEEXT2TYPE from ..extras.logging import get_logger -from .utils import checksum -from .parser import get_dataset_list from .aligner import align_dataset -from .template import get_template_and_fix_tokenizer +from .parser import get_dataset_list from .preprocess import get_preprocess_and_print_func +from .template import get_template_and_fix_tokenizer +from .utils import checksum if TYPE_CHECKING: @@ -18,8 +18,8 @@ if TYPE_CHECKING: from transformers import Seq2SeqTrainingArguments from transformers.tokenization_utils import PreTrainedTokenizer + from ..hparams import DataArguments, ModelArguments from .parser import DatasetAttr - from ..hparams import ModelArguments, DataArguments logger = get_logger(__name__) @@ -44,14 +44,14 @@ def load_single_dataset( elif dataset_attr.load_from == "file": data_files = [] local_path: str = os.path.join(data_args.dataset_dir, dataset_attr.dataset_name) - if os.path.isdir(local_path): # is directory + if os.path.isdir(local_path): # is directory for file_name in os.listdir(local_path): data_files.append(os.path.join(local_path, file_name)) if data_path is None: data_path = FILEEXT2TYPE.get(file_name.split(".")[-1], None) elif data_path != FILEEXT2TYPE.get(file_name.split(".")[-1], None): raise ValueError("File types should be identical.") - elif os.path.isfile(local_path): # is file + elif os.path.isfile(local_path): # is file data_files.append(local_path) data_path = FILEEXT2TYPE.get(local_path.split(".")[-1], None) else: @@ -78,12 +78,12 @@ def load_single_dataset( split=data_args.split, cache_dir=cache_dir, token=model_args.ms_hub_token, - use_streaming=(data_args.streaming and (dataset_attr.load_from != "file")) + use_streaming=(data_args.streaming and (dataset_attr.load_from != "file")), ).to_hf_dataset() except ImportError: raise ImportError("Please install modelscope via `pip install modelscope -U`") else: - if "trust_remote_code" in inspect.signature(load_dataset).parameters: # for datasets==2.16.0 + if "trust_remote_code" in inspect.signature(load_dataset).parameters: # for datasets==2.16.0 kwargs = {"trust_remote_code": True} else: kwargs = {} @@ -97,13 +97,13 @@ def load_single_dataset( cache_dir=model_args.cache_dir, token=model_args.hf_hub_token, streaming=(data_args.streaming and (dataset_attr.load_from != "file")), - **kwargs + **kwargs, ) - if data_args.streaming and (dataset_attr.load_from == "file"): # faster than specifying streaming=True - dataset = dataset.to_iterable_dataset() # TODO: add num shards parameter + if data_args.streaming and (dataset_attr.load_from == "file"): # faster than specifying streaming=True + dataset = dataset.to_iterable_dataset() # TODO: add num shards parameter - if data_args.max_samples is not None: # truncate dataset + if data_args.max_samples is not None: # truncate dataset num_samples = min(data_args.max_samples, len(dataset)) dataset = dataset.select(range(num_samples)) @@ -113,7 +113,7 @@ def load_single_dataset( def merge_dataset( all_datasets: List[Union["Dataset", "IterableDataset"]], data_args: "DataArguments", - training_args: "Seq2SeqTrainingArguments" + training_args: "Seq2SeqTrainingArguments", ) -> Union["Dataset", "IterableDataset"]: if len(all_datasets) == 1: return all_datasets[0] @@ -128,7 +128,7 @@ def merge_dataset( datasets=all_datasets, probabilities=data_args.interleave_probs, seed=training_args.seed, - stopping_strategy="first_exhausted" if data_args.mix_strategy.endswith("under") else "all_exhausted" + stopping_strategy="first_exhausted" if data_args.mix_strategy.endswith("under") else "all_exhausted", ) else: raise ValueError("Unknown mixing strategy.") @@ -160,7 +160,7 @@ def get_dataset( with training_args.main_process_first(desc="load dataset"): all_datasets = [] - for dataset_attr in get_dataset_list(data_args): # TODO: add split + for dataset_attr in get_dataset_list(data_args): # TODO: add split all_datasets.append(load_single_dataset(dataset_attr, model_args, data_args)) dataset = merge_dataset(all_datasets, data_args, training_args) @@ -174,15 +174,10 @@ def get_dataset( kwargs = dict( num_proc=data_args.preprocessing_num_workers, load_from_cache_file=(not data_args.overwrite_cache), - desc="Running tokenizer on dataset" + desc="Running tokenizer on dataset", ) - dataset = dataset.map( - preprocess_func, - batched=True, - remove_columns=column_names, - **kwargs - ) + dataset = dataset.map(preprocess_func, batched=True, remove_columns=column_names, **kwargs) if data_args.cache_path is not None and not os.path.exists(data_args.cache_path): if training_args.should_save: diff --git a/src/llmtuner/data/parser.py b/src/llmtuner/data/parser.py index 9da5b732..474aad83 100644 --- a/src/llmtuner/data/parser.py +++ b/src/llmtuner/data/parser.py @@ -1,18 +1,18 @@ -import os import json -from typing import TYPE_CHECKING, List, Literal, Optional +import os from dataclasses import dataclass +from typing import TYPE_CHECKING, List, Literal, Optional from ..extras.constants import DATA_CONFIG from ..extras.misc import use_modelscope + if TYPE_CHECKING: from ..hparams import DataArguments @dataclass class DatasetAttr: - load_from: Literal["hf_hub", "ms_hub", "script", "file"] dataset_name: Optional[str] = None dataset_sha1: Optional[str] = None @@ -49,7 +49,9 @@ def get_dataset_list(data_args: "DataArguments") -> List["DatasetAttr"]: dataset_info = json.load(f) except Exception as err: if data_args.dataset is not None: - raise ValueError("Cannot open {} due to {}.".format(os.path.join(data_args.dataset_dir, DATA_CONFIG), str(err))) + raise ValueError( + "Cannot open {} due to {}.".format(os.path.join(data_args.dataset_dir, DATA_CONFIG), str(err)) + ) dataset_info = None if data_args.interleave_probs is not None: @@ -74,7 +76,7 @@ def get_dataset_list(data_args: "DataArguments") -> List["DatasetAttr"]: dataset_attr = DatasetAttr( "file", dataset_name=dataset_info[name]["file_name"], - dataset_sha1=dataset_info[name].get("file_sha1", None) + dataset_sha1=dataset_info[name].get("file_sha1", None), ) dataset_attr.subset = dataset_info[name].get("subset", None) diff --git a/src/llmtuner/data/preprocess.py b/src/llmtuner/data/preprocess.py index 81caeda7..c4f182a3 100644 --- a/src/llmtuner/data/preprocess.py +++ b/src/llmtuner/data/preprocess.py @@ -5,6 +5,7 @@ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Literal, Tuple from ..extras.constants import IGNORE_INDEX from ..extras.logging import get_logger + if TYPE_CHECKING: from transformers import Seq2SeqTrainingArguments from transformers.tokenization_utils import PreTrainedTokenizer @@ -17,9 +18,7 @@ logger = get_logger(__name__) def preprocess_pretrain_dataset( - examples: Dict[str, List[Any]], - tokenizer: "PreTrainedTokenizer", - data_args: "DataArguments" + examples: Dict[str, List[Any]], tokenizer: "PreTrainedTokenizer", data_args: "DataArguments" ) -> Dict[str, List[List[int]]]: # build grouped texts with format `X1 X2 X3 ...` text_examples = [examples["prompt"][i][0]["content"] for i in range(len(examples["prompt"]))] @@ -35,7 +34,7 @@ def preprocess_pretrain_dataset( total_length = (total_length // block_size) * block_size # split by chunks of cutoff_len result = { - k: [t[i: i + block_size] for i in range(0, total_length, block_size)] + k: [t[i : i + block_size] for i in range(0, total_length, block_size)] for k, t in concatenated_examples.items() } return result @@ -57,9 +56,11 @@ def preprocess_supervised_dataset( messages = examples["prompt"][i] + examples["response"][i] input_ids, labels = [], [] - for turn_idx, (source_ids, target_ids) in enumerate(template.encode_multiturn( - tokenizer, messages, examples["system"][i], examples["tools"][i], data_args.cutoff_len - )): + for turn_idx, (source_ids, target_ids) in enumerate( + template.encode_multiturn( + tokenizer, messages, examples["system"][i], examples["tools"][i], data_args.cutoff_len + ) + ): if data_args.train_on_prompt: source_mask = source_ids elif turn_idx != 0 and template.efficient_eos: @@ -96,9 +97,9 @@ def preprocess_packed_supervised_dataset( continue messages = examples["prompt"][i] + examples["response"][i] - for turn_idx, (source_ids, target_ids) in enumerate(template.encode_multiturn( - tokenizer, messages, examples["system"][i], examples["tools"][i] - )): + for turn_idx, (source_ids, target_ids) in enumerate( + template.encode_multiturn(tokenizer, messages, examples["system"][i], examples["tools"][i]) + ): if data_args.train_on_prompt: source_mask = source_ids elif turn_idx != 0 and template.efficient_eos: @@ -119,9 +120,9 @@ def preprocess_packed_supervised_dataset( total_length = (total_length // block_size) * block_size # split by chunks of cutoff_len for i in range(0, total_length, block_size): - model_inputs["input_ids"].append(input_ids[i: i + block_size]) + model_inputs["input_ids"].append(input_ids[i : i + block_size]) model_inputs["attention_mask"].append([1] * block_size) - model_inputs["labels"].append(labels[i: i + block_size]) + model_inputs["labels"].append(labels[i : i + block_size]) return model_inputs @@ -191,9 +192,11 @@ def print_supervised_dataset_example(example: Dict[str, List[int]], tokenizer: " print("input_ids:\n{}".format(example["input_ids"])) print("inputs:\n{}".format(tokenizer.decode(example["input_ids"], skip_special_tokens=False))) print("label_ids:\n{}".format(example["labels"])) - print("labels:\n{}".format( - tokenizer.decode(list(filter(lambda x: x != IGNORE_INDEX, example["labels"])), skip_special_tokens=False) - )) + print( + "labels:\n{}".format( + tokenizer.decode(list(filter(lambda x: x != IGNORE_INDEX, example["labels"])), skip_special_tokens=False) + ) + ) def print_pairwise_dataset_example(example: Dict[str, List[int]], tokenizer: "PreTrainedTokenizer") -> None: @@ -232,10 +235,14 @@ def get_preprocess_and_print_func( print_function = partial(print_supervised_dataset_example, tokenizer=tokenizer) elif stage == "rm": - preprocess_func = partial(preprocess_pairwise_dataset, tokenizer=tokenizer, template=template, data_args=data_args) + preprocess_func = partial( + preprocess_pairwise_dataset, tokenizer=tokenizer, template=template, data_args=data_args + ) print_function = partial(print_pairwise_dataset_example, tokenizer=tokenizer) else: - preprocess_func = partial(preprocess_unsupervised_dataset, tokenizer=tokenizer, template=template, data_args=data_args) + preprocess_func = partial( + preprocess_unsupervised_dataset, tokenizer=tokenizer, template=template, data_args=data_args + ) print_function = partial(print_unsupervised_dataset_example, tokenizer=tokenizer) return preprocess_func, print_function diff --git a/src/llmtuner/data/template.py b/src/llmtuner/data/template.py index 253edbc5..924007bd 100644 --- a/src/llmtuner/data/template.py +++ b/src/llmtuner/data/template.py @@ -2,8 +2,8 @@ from dataclasses import dataclass from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple, Union from ..extras.logging import get_logger +from .formatter import FunctionFormatter, StringFormatter, ToolFormatter from .utils import Role -from .formatter import StringFormatter, FunctionFormatter, ToolFormatter if TYPE_CHECKING: @@ -15,7 +15,6 @@ logger = get_logger(__name__) @dataclass class Template: - format_user: Callable format_assistant: Callable format_system: Callable @@ -34,7 +33,7 @@ class Template: messages: List[Dict[str, str]], system: Optional[str] = None, tools: Optional[str] = None, - cutoff_len: Optional[int] = 1_000_000 + cutoff_len: Optional[int] = 1_000_000, ) -> Tuple[List[int], List[int]]: r""" Returns a single pair of token ids representing prompt and response respectively. @@ -53,7 +52,7 @@ class Template: messages: List[Dict[str, str]], system: str, tools: str, - cutoff_len: Optional[int] = 1_000_000 + cutoff_len: Optional[int] = 1_000_000, ) -> List[Tuple[List[int], List[int]]]: r""" Returns multiple pairs of token ids representing prompts and responses respectively. @@ -67,7 +66,7 @@ class Template: messages: List[Dict[str, str]], system: str, tools: str, - cutoff_len: int + cutoff_len: int, ) -> List[Tuple[List[int], List[int]]]: r""" Encodes formatted inputs to pairs of token ids. @@ -102,19 +101,17 @@ class Template: if total_length >= cutoff_len: break - encoded_messages[i] = encoded_messages[i][:cutoff_len-total_length] + encoded_messages[i] = encoded_messages[i][: cutoff_len - total_length] total_length += len(encoded_messages[i]) - encoded_messages[i+1] = encoded_messages[i+1][:max(1, cutoff_len-total_length)] - total_length += len(encoded_messages[i+1]) - encoded_pairs.append((encoded_messages[i], encoded_messages[i+1])) + encoded_messages[i + 1] = encoded_messages[i + 1][: max(1, cutoff_len - total_length)] + total_length += len(encoded_messages[i + 1]) + encoded_pairs.append((encoded_messages[i], encoded_messages[i + 1])) return encoded_pairs def _convert_elements_to_ids( - self, - tokenizer: "PreTrainedTokenizer", - elements: List[Union[str, Dict[str, str]]] + self, tokenizer: "PreTrainedTokenizer", elements: List[Union[str, Dict[str, str]]] ) -> List[int]: r""" Converts elements to token ids. @@ -139,14 +136,13 @@ class Template: @dataclass class Llama2Template(Template): - def _encode( self, tokenizer: "PreTrainedTokenizer", messages: List[Dict[str, str]], system: str, tools: str, - cutoff_len: int + cutoff_len: int, ) -> List[Tuple[List[int], List[int]]]: r""" Encodes formatted inputs to pairs of token ids. @@ -182,12 +178,12 @@ class Llama2Template(Template): if total_length >= cutoff_len: break - encoded_messages[i] = encoded_messages[i][:cutoff_len-total_length] + encoded_messages[i] = encoded_messages[i][: cutoff_len - total_length] total_length += len(encoded_messages[i]) - encoded_messages[i+1] = encoded_messages[i+1][:max(1, cutoff_len-total_length)] - total_length += len(encoded_messages[i+1]) - encoded_pairs.append((encoded_messages[i], encoded_messages[i+1])) + encoded_messages[i + 1] = encoded_messages[i + 1][: max(1, cutoff_len - total_length)] + total_length += len(encoded_messages[i + 1]) + encoded_pairs.append((encoded_messages[i], encoded_messages[i + 1])) return encoded_pairs @@ -207,32 +203,26 @@ def register_template( separator: Optional[List[Union[str, Dict[str, str]]]] = "", stop_words: Optional[List[str]] = [], efficient_eos: Optional[bool] = False, - replace_eos: Optional[bool] = False + replace_eos: Optional[bool] = False, ) -> None: template_class = Llama2Template if name.startswith("llama2") else Template templates[name] = template_class( format_user=format_user or StringFormatter(container=["{{content}}"]), - format_assistant=format_assistant or StringFormatter(container=[ - "{{content}}", {"eos_token"} - ]), + format_assistant=format_assistant or StringFormatter(container=["{{content}}", {"eos_token"}]), format_system=format_system or StringFormatter(container=["{{content}}"]), format_tool=format_tool or ToolFormatter(type="default"), format_observation=format_observation or format_user, - format_function=format_function or FunctionFormatter(container=[ - "Action: {{name}}\nAction Input: {{arguments}}", {"eos_token"} - ]), + format_function=format_function + or FunctionFormatter(container=["Action: {{name}}\nAction Input: {{arguments}}", {"eos_token"}]), system=system, separator=separator, stop_words=stop_words, efficient_eos=efficient_eos, - replace_eos=replace_eos + replace_eos=replace_eos, ) -def get_template_and_fix_tokenizer( - name: str, - tokenizer: "PreTrainedTokenizer" -) -> Template: +def get_template_and_fix_tokenizer(name: str, tokenizer: "PreTrainedTokenizer") -> Template: if tokenizer.eos_token_id is None: tokenizer.eos_token = "<|endoftext|>" logger.info("Add eos token: {}".format(tokenizer.eos_token)) @@ -241,7 +231,7 @@ def get_template_and_fix_tokenizer( tokenizer.pad_token = tokenizer.eos_token logger.info("Add pad token: {}".format(tokenizer.pad_token)) - if name is None: # for pre-training + if name is None: # for pre-training return None template = templates.get(name, None) @@ -258,8 +248,7 @@ def get_template_and_fix_tokenizer( if stop_words: tokenizer.add_special_tokens( - dict(additional_special_tokens=stop_words), - replace_additional_special_tokens=False + dict(additional_special_tokens=stop_words), replace_additional_special_tokens=False ) logger.info("Add {} to stop words.".format(",".join(stop_words))) @@ -268,263 +257,153 @@ def get_template_and_fix_tokenizer( register_template( name="alpaca", - format_user=StringFormatter(container=[ - "### Instruction:\n{{content}}\n\n### Response:\n" - ]), + format_user=StringFormatter(container=["### Instruction:\n{{content}}\n\n### Response:\n"]), system=( - "Below is an instruction that describes a task. " - "Write a response that appropriately completes the request." + "Below is an instruction that describes a task. " "Write a response that appropriately completes the request." ), - separator=[ - "\n\n" - ] + separator=["\n\n"], ) register_template( name="aquila", - format_user=StringFormatter(container=[ - "Human: {{content}}###Assistant:" - ]), - format_assistant=StringFormatter(container=[ - "{{content}}" - ]), + format_user=StringFormatter(container=["Human: {{content}}###Assistant:"]), + format_assistant=StringFormatter(container=["{{content}}"]), system=( "A chat between a curious human and an artificial intelligence assistant. " "The assistant gives helpful, detailed, and polite answers to the human's questions." ), - separator=[ - "###" - ], - stop_words=[ - "" - ], - efficient_eos=True + separator=["###"], + stop_words=[""], + efficient_eos=True, ) register_template( name="baichuan", - format_user=StringFormatter(container=[ - {"token": ""}, - "{{content}}", - {"token": ""} - ]), - format_assistant=StringFormatter(container=[ - "{{content}}" - ]), - efficient_eos=True + format_user=StringFormatter(container=[{"token": ""}, "{{content}}", {"token": ""}]), + format_assistant=StringFormatter(container=["{{content}}"]), + efficient_eos=True, ) register_template( name="baichuan2", - format_user=StringFormatter(container=[ - {"token": ""}, - "{{content}}", - {"token": ""} - ]), - format_assistant=StringFormatter(container=[ - "{{content}}" - ]), - efficient_eos=True + format_user=StringFormatter(container=[{"token": ""}, "{{content}}", {"token": ""}]), + format_assistant=StringFormatter(container=["{{content}}"]), + efficient_eos=True, ) register_template( - name="belle", - format_user=StringFormatter(container=[ - "Human: {{content}}\n\nBelle: " - ]), - separator=[ - "\n\n" - ] + name="belle", format_user=StringFormatter(container=["Human: {{content}}\n\nBelle: "]), separator=["\n\n"] ) register_template( name="bluelm", - format_user=StringFormatter(container=[ - {"token": "[|Human|]:"}, - "{{content}}", - {"token": "[|AI|]:"} - ]) + format_user=StringFormatter(container=[{"token": "[|Human|]:"}, "{{content}}", {"token": "[|AI|]:"}]), ) register_template( name="chatglm2", - format_user=StringFormatter(container=[ - "[Round {{idx}}]\n\n问:{{content}}\n\n答:" - ]), - format_assistant=StringFormatter(container=[ - "{{content}}" - ]), - format_system=StringFormatter(container=[ - {"token": "[gMASK]"}, - {"token": "sop"}, - "{{content}}" - ]), - separator=[ - "\n\n" - ], - efficient_eos=True + format_user=StringFormatter(container=["[Round {{idx}}]\n\n问:{{content}}\n\n答:"]), + format_assistant=StringFormatter(container=["{{content}}"]), + format_system=StringFormatter(container=[{"token": "[gMASK]"}, {"token": "sop"}, "{{content}}"]), + separator=["\n\n"], + efficient_eos=True, ) register_template( name="chatglm3", - format_user=StringFormatter(container=[ - {"token": "<|user|>"}, - "\n", - "{{content}}", - {"token": "<|assistant|>"} - ]), - format_assistant=StringFormatter(container=[ - "\n" - "{{content}}" - ]), - format_system=StringFormatter(container=[ - {"token": "[gMASK]"}, - {"token": "sop"}, - {"token": "<|system|>"}, - "\n", - "{{content}}" - ]), - format_observation=StringFormatter(container=[ - {"token": "<|observation|>"}, - "\n", - "{{content}}" - ]), - format_function=FunctionFormatter(container=[ - "{{name}}\n{{arguments}}" - ]), + format_user=StringFormatter(container=[{"token": "<|user|>"}, "\n", "{{content}}", {"token": "<|assistant|>"}]), + format_assistant=StringFormatter(container=["\n" "{{content}}"]), + format_system=StringFormatter( + container=[{"token": "[gMASK]"}, {"token": "sop"}, {"token": "<|system|>"}, "\n", "{{content}}"] + ), + format_observation=StringFormatter(container=[{"token": "<|observation|>"}, "\n", "{{content}}"]), + format_function=FunctionFormatter(container=["{{name}}\n{{arguments}}"]), system=( "You are ChatGLM3, a large language model trained by Zhipu.AI. " "Follow the user's instructions carefully. Respond using markdown." ), - stop_words=[ - "<|user|>", - "<|observation|>" - ], - efficient_eos=True + stop_words=["<|user|>", "<|observation|>"], + efficient_eos=True, ) register_template( - name="codegeex2", - format_system=StringFormatter(container=[ - {"token": "[gMASK]"}, - {"token": "sop"}, - "{{content}}" - ]) + name="codegeex2", format_system=StringFormatter(container=[{"token": "[gMASK]"}, {"token": "sop"}, "{{content}}"]) ) -register_template( - name="deepseek", - format_user=StringFormatter(container=[ - "User: {{content}}\n\nAssistant:" - ]) -) +register_template(name="deepseek", format_user=StringFormatter(container=["User: {{content}}\n\nAssistant:"])) register_template( name="deepseekcoder", - format_user=StringFormatter(container=[ - "### Instruction:\n{{content}}\n### Response:\n" - ]), - format_assistant=StringFormatter(container=[ - "{{content}}" - ]), + format_user=StringFormatter(container=["### Instruction:\n{{content}}\n### Response:\n"]), + format_assistant=StringFormatter(container=["{{content}}"]), system=( "You are an AI programming assistant, utilizing the Deepseek Coder model, " "developed by Deepseek Company, and you only answer questions related to computer science. " "For politically sensitive questions, security and privacy issues, " "and other non-computer science questions, you will refuse to answer\n" ), - separator=[ - "\n", - {"token": "<|EOT|>"}, - "\n" - ], - stop_words=[ - "<|EOT|>" - ], - efficient_eos=True + separator=["\n", {"token": "<|EOT|>"}, "\n"], + stop_words=["<|EOT|>"], + efficient_eos=True, ) register_template( name="default", - format_user=StringFormatter(container=[ - "Human: {{content}}\nAssistant: " - ]), + format_user=StringFormatter(container=["Human: {{content}}\nAssistant: "]), system=( "A chat between a curious user and an artificial intelligence assistant. " "The assistant gives helpful, detailed, and polite answers to the user's questions.\n" ), - separator=[ - "\n" - ] + separator=["\n"], ) register_template( name="falcon", - format_user=StringFormatter(container=[ - "User: {{content}}\nFalcon:" - ]), - format_assistant=StringFormatter(container=[ - "{{content}}" - ]), - separator=[ - "\n" - ], - efficient_eos=True + format_user=StringFormatter(container=["User: {{content}}\nFalcon:"]), + format_assistant=StringFormatter(container=["{{content}}"]), + separator=["\n"], + efficient_eos=True, ) register_template( name="intern", - format_user=StringFormatter(container=[ - "<|User|>:{{content}}", - {"token": ""}, - "\n<|Bot|>:" - ]), - format_assistant=StringFormatter(container=[ - "{{content}}" - ]), - separator=[ - {"token": ""}, - "\n" - ], - stop_words=[ - "" - ], - efficient_eos=True + format_user=StringFormatter(container=["<|User|>:{{content}}", {"token": ""}, "\n<|Bot|>:"]), + format_assistant=StringFormatter(container=["{{content}}"]), + separator=[{"token": ""}, "\n"], + stop_words=[""], + efficient_eos=True, ) register_template( name="intern2", - format_user=StringFormatter(container=[ - {"token": "[UNUSED_TOKEN_146]"}, - "user\n{{content}}", - {"token": "[UNUSED_TOKEN_145]"}, - "\n", - {"token": "[UNUSED_TOKEN_146]"}, - "assistant\n" - ]), - format_assistant=StringFormatter(container=[ - "{{content}}" - ]), - format_system=StringFormatter(container=[ - {"token": "[UNUSED_TOKEN_146]"}, - "system\n{{content}}", - {"token": "[UNUSED_TOKEN_145]"}, - "\n" - ]), + format_user=StringFormatter( + container=[ + {"token": "[UNUSED_TOKEN_146]"}, + "user\n{{content}}", + {"token": "[UNUSED_TOKEN_145]"}, + "\n", + {"token": "[UNUSED_TOKEN_146]"}, + "assistant\n", + ] + ), + format_assistant=StringFormatter(container=["{{content}}"]), + format_system=StringFormatter( + container=[{"token": "[UNUSED_TOKEN_146]"}, "system\n{{content}}", {"token": "[UNUSED_TOKEN_145]"}, "\n"] + ), system=( "You are an AI assistant whose name is InternLM (书生·浦语).\n" "- InternLM (书生·浦语) is a conversational language model that is developed " @@ -532,14 +411,9 @@ register_template( "- InternLM (书生·浦语) can understand and communicate fluently in the language chosen " "by the user such as English and 中文." ), - separator=[ - {"token": "[UNUSED_TOKEN_145]"}, - "\n" - ], - stop_words=[ - "[UNUSED_TOKEN_145]" - ], - efficient_eos=True + separator=[{"token": "[UNUSED_TOKEN_145]"}, "\n"], + stop_words=["[UNUSED_TOKEN_145]"], + efficient_eos=True, ) @@ -556,7 +430,7 @@ register_template( "If a question does not make any sense, or is not factually coherent, " "explain why instead of answering something not correct. " "If you don't know the answer to a question, please don't share false information." - ) + ), ) @@ -564,142 +438,83 @@ register_template( name="llama2_zh", format_user=StringFormatter(container=["[INST] {{content}} [/INST]"]), format_system=StringFormatter(container=["<>\n{{content}}\n<>\n\n"]), - system="You are a helpful assistant. 你是一个乐于助人的助手。" + system="You are a helpful assistant. 你是一个乐于助人的助手。", ) -register_template( - name="mistral", - format_user=StringFormatter(container=["[INST] {{content}} [/INST]"]) -) +register_template(name="mistral", format_user=StringFormatter(container=["[INST] {{content}} [/INST]"])) register_template( name="openchat", - format_user=StringFormatter(container=[ - "GPT4 Correct User: {{content}}", - {"token": "<|end_of_turn|>"}, - "GPT4 Correct Assistant:" - ]), - format_assistant=StringFormatter(container=[ - "{{content}}" - ]), - separator=[ - {"token": "<|end_of_turn|>"} - ], - stop_words=[ - "<|end_of_turn|>" - ], - efficient_eos=True + format_user=StringFormatter( + container=["GPT4 Correct User: {{content}}", {"token": "<|end_of_turn|>"}, "GPT4 Correct Assistant:"] + ), + format_assistant=StringFormatter(container=["{{content}}"]), + separator=[{"token": "<|end_of_turn|>"}], + stop_words=["<|end_of_turn|>"], + efficient_eos=True, ) register_template( name="qwen", - format_user=StringFormatter(container=[ - "<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n" - ]), - format_system=StringFormatter(container=[ - "<|im_start|>system\n{{content}}<|im_end|>\n" - ]), + format_user=StringFormatter(container=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]), + format_system=StringFormatter(container=["<|im_start|>system\n{{content}}<|im_end|>\n"]), system="You are a helpful assistant.", - separator=[ - "\n" - ], - stop_words=[ - "<|im_end|>" - ], - replace_eos=True + separator=["\n"], + stop_words=["<|im_end|>"], + replace_eos=True, ) -register_template( - name="solar", - format_user=StringFormatter(container=[ - "### User:\n{{content}}\n\n### Assistant:\n" - ]) -) +register_template(name="solar", format_user=StringFormatter(container=["### User:\n{{content}}\n\n### Assistant:\n"])) register_template( name="starchat", - format_user=StringFormatter(container=[ - {"token": "<|user|>"}, - "\n{{content}}", - {"token": "<|end|>"}, - "\n", - {"token": "<|assistant|>"} - ]), - format_assistant=StringFormatter(container=[ - "{{content}}" - ]), - format_system=StringFormatter(container=[ - {"token": "<|system|>"}, - "\n{{content}}", - {"token": "<|end|>"}, - "\n" - ]), - separator=[ - {"token": "<|end|>"}, - "\n" - ], - stop_words=[ - "<|end|>" - ], - efficient_eos=True + format_user=StringFormatter( + container=[{"token": "<|user|>"}, "\n{{content}}", {"token": "<|end|>"}, "\n", {"token": "<|assistant|>"}] + ), + format_assistant=StringFormatter(container=["{{content}}"]), + format_system=StringFormatter(container=[{"token": "<|system|>"}, "\n{{content}}", {"token": "<|end|>"}, "\n"]), + separator=[{"token": "<|end|>"}, "\n"], + stop_words=["<|end|>"], + efficient_eos=True, ) -register_template( - name="vanilla" -) +register_template(name="vanilla") register_template( name="vicuna", - format_user=StringFormatter(container=[ - "USER: {{content}} ASSISTANT:" - ]), + format_user=StringFormatter(container=["USER: {{content}} ASSISTANT:"]), system=( "A chat between a curious user and an artificial intelligence assistant. " "The assistant gives helpful, detailed, and polite answers to the user's questions." - ) + ), ) register_template( name="xuanyuan", - format_user=StringFormatter(container=[ - "Human: {{content}} Assistant:" - ]), + format_user=StringFormatter(container=["Human: {{content}} Assistant:"]), system=( "以下是用户和人工智能助手之间的对话。用户以Human开头,人工智能助手以Assistant开头," "会对人类提出的问题给出有帮助、高质量、详细和礼貌的回答,并且总是拒绝参与与不道德、" "不安全、有争议、政治敏感等相关的话题、问题和指示。\n" - ) + ), ) -register_template( - name="xverse", - format_user=StringFormatter(container=[ - "Human: {{content}}\n\nAssistant: " - ]) -) +register_template(name="xverse", format_user=StringFormatter(container=["Human: {{content}}\n\nAssistant: "])) register_template( name="yayi", - format_user=StringFormatter(container=[ - {"token": "<|Human|>"}, - ":\n{{content}}\n\n", - {"token": "<|YaYi|>"}, - ":" - ]), - format_system=StringFormatter(container=[ - {"token": "<|System|>"}, - ":\n{{content}}\n\n" - ]), + format_user=StringFormatter(container=[{"token": "<|Human|>"}, ":\n{{content}}\n\n", {"token": "<|YaYi|>"}, ":"]), + format_system=StringFormatter(container=[{"token": "<|System|>"}, ":\n{{content}}\n\n"]), system=( "You are a helpful, respectful and honest assistant named YaYi " "developed by Beijing Wenge Technology Co.,Ltd. " @@ -711,67 +526,43 @@ register_template( "explain why instead of answering something not correct. " "If you don't know the answer to a question, please don't share false information." ), - separator=[ - "\n\n" - ], - stop_words=[ - "<|End|>" - ] + separator=["\n\n"], + stop_words=["<|End|>"], ) register_template( name="yi", - format_user=StringFormatter(container=[ - "<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n" - ]), - separator=[ - "\n" - ], - stop_words=[ - "<|im_end|>" - ], - replace_eos=True + format_user=StringFormatter(container=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]), + separator=["\n"], + stop_words=["<|im_end|>"], + replace_eos=True, ) register_template( name="yuan", - format_user=StringFormatter(container=[ - "{{content}}", - {"token": ""} - ]), - separator=[ - "\n" - ], - stop_words=[ - "" - ], - replace_eos=True + format_user=StringFormatter(container=["{{content}}", {"token": ""}]), + separator=["\n"], + stop_words=[""], + replace_eos=True, ) register_template( name="zephyr", - format_user=StringFormatter(container=[ - "<|user|>\n{{content}}<|assistant|>" - ]), - format_system=StringFormatter(container=[ - "<|system|>\n{{content}}", - ]), - system="You are a friendly chatbot who always responds in the style of a pirate" + format_user=StringFormatter(container=["<|user|>\n{{content}}<|assistant|>"]), + format_system=StringFormatter( + container=[ + "<|system|>\n{{content}}", + ] + ), + system="You are a friendly chatbot who always responds in the style of a pirate", ) register_template( name="ziya", - format_user=StringFormatter(container=[ - {"token": ""}, - ":{{content}}\n", - {"token": ""}, - ":" - ]), - separator=[ - "\n" - ] + format_user=StringFormatter(container=[{"token": ""}, ":{{content}}\n", {"token": ""}, ":"]), + separator=["\n"], ) diff --git a/src/llmtuner/data/utils.py b/src/llmtuner/data/utils.py index 106e87a7..53b8054e 100644 --- a/src/llmtuner/data/utils.py +++ b/src/llmtuner/data/utils.py @@ -4,9 +4,11 @@ from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union from ..extras.logging import get_logger + if TYPE_CHECKING: from datasets import Dataset, IterableDataset from transformers import TrainingArguments + from llmtuner.hparams import DataArguments @@ -44,12 +46,10 @@ def infer_max_len(source_len: int, target_len: int, data_args: "DataArguments") def split_dataset( - dataset: Union["Dataset", "IterableDataset"], - data_args: "DataArguments", - training_args: "TrainingArguments" + dataset: Union["Dataset", "IterableDataset"], data_args: "DataArguments", training_args: "TrainingArguments" ) -> Dict[str, "Dataset"]: if training_args.do_train: - if data_args.val_size > 1e-6: # Split the dataset + if data_args.val_size > 1e-6: # Split the dataset if data_args.streaming: val_set = dataset.take(int(data_args.val_size)) train_set = dataset.skip(int(data_args.val_size)) @@ -63,5 +63,5 @@ def split_dataset( if data_args.streaming: dataset = dataset.shuffle(buffer_size=data_args.buffer_size, seed=training_args.seed) return {"train_dataset": dataset} - else: # do_eval or do_predict + else: # do_eval or do_predict return {"eval_dataset": dataset} diff --git a/src/llmtuner/eval/evaluator.py b/src/llmtuner/eval/evaluator.py index 08bdb08c..d0c949ee 100644 --- a/src/llmtuner/eval/evaluator.py +++ b/src/llmtuner/eval/evaluator.py @@ -1,35 +1,34 @@ # Inspired by: https://github.com/hendrycks/test/blob/master/evaluate_flan.py -import os -import json -import torch -import numpy as np import inspect -from tqdm import tqdm, trange +import json +import os from typing import Any, Dict, List, Optional +import numpy as np +import torch from datasets import load_dataset +from tqdm import tqdm, trange from transformers.utils import cached_file from ..data import get_template_and_fix_tokenizer -from .template import get_eval_template from ..extras.constants import CHOICES, SUBJECTS from ..hparams import get_eval_args from ..model import dispatch_model, load_model_and_tokenizer +from .template import get_eval_template class Evaluator: - def __init__(self, args: Optional[Dict[str, Any]] = None) -> None: self.model_args, self.data_args, self.eval_args, finetuning_args = get_eval_args(args) self.model, self.tokenizer = load_model_and_tokenizer(self.model_args, finetuning_args) - self.tokenizer.padding_side = "right" # avoid overflow issue in batched inference for llama2 + self.tokenizer.padding_side = "right" # avoid overflow issue in batched inference for llama2 self.model = dispatch_model(self.model) self.template = get_template_and_fix_tokenizer(self.data_args.template, self.tokenizer) self.eval_template = get_eval_template(self.eval_args.lang) - self.choice_inputs = [self.tokenizer.encode( - self.eval_template.prefix + ch, add_special_tokens=False - )[-1] for ch in CHOICES] + self.choice_inputs = [ + self.tokenizer.encode(self.eval_template.prefix + ch, add_special_tokens=False)[-1] for ch in CHOICES + ] @torch.inference_mode() def batch_inference(self, batch_input: Dict[str, torch.Tensor]) -> List[str]: @@ -41,10 +40,10 @@ class Evaluator: def eval(self) -> None: mapping = cached_file( - path_or_repo_id = os.path.join(self.eval_args.task_dir, self.eval_args.task), + path_or_repo_id=os.path.join(self.eval_args.task_dir, self.eval_args.task), filename="mapping.json", cache_dir=self.model_args.cache_dir, - token=self.model_args.hf_hub_token + token=self.model_args.hf_hub_token, ) with open(mapping, "r", encoding="utf-8") as f: @@ -54,7 +53,7 @@ class Evaluator: pbar = tqdm(categorys.keys(), desc="Processing subjects", position=0) results = {} for subject in pbar: - if "trust_remote_code" in inspect.signature(load_dataset).parameters: # for datasets==2.16.0 + if "trust_remote_code" in inspect.signature(load_dataset).parameters: # for datasets==2.16.0 kwargs = {"trust_remote_code": True} else: kwargs = {} @@ -65,32 +64,34 @@ class Evaluator: cache_dir=self.model_args.cache_dir, download_mode=self.eval_args.download_mode, token=self.model_args.hf_hub_token, - **kwargs + **kwargs, ) pbar.set_postfix_str(categorys[subject]["name"]) inputs, outputs, labels = [], [], [] for i in trange(len(dataset[self.data_args.split]), desc="Formatting batches", position=1, leave=False): - support_set = dataset["train"].shuffle().select(range(min(self.eval_args.n_shot, len(dataset["train"])))) + support_set = ( + dataset["train"].shuffle().select(range(min(self.eval_args.n_shot, len(dataset["train"])))) + ) messages = self.eval_template.format_example( target_data=dataset[self.data_args.split][i], support_set=support_set, - subject_name=categorys[subject]["name"] + subject_name=categorys[subject]["name"], ) - input_ids, _ = self.template.encode_oneturn( - tokenizer=self.tokenizer, messages=messages - ) + input_ids, _ = self.template.encode_oneturn(tokenizer=self.tokenizer, messages=messages) inputs.append({"input_ids": input_ids, "attention_mask": [1] * len(input_ids)}) labels.append(messages[-1]["content"]) - for i in trange(0, len(inputs), self.eval_args.batch_size, desc="Predicting batches", position=1, leave=False): + for i in trange( + 0, len(inputs), self.eval_args.batch_size, desc="Predicting batches", position=1, leave=False + ): batch_input = self.tokenizer.pad( inputs[i : i + self.eval_args.batch_size], return_attention_mask=True, return_tensors="pt" ).to(self.model.device) preds = self.batch_inference(batch_input) outputs += preds - corrects = (np.array(outputs) == np.array(labels)) + corrects = np.array(outputs) == np.array(labels) category_name = categorys[subject]["category"] category_corrects[category_name] = np.concatenate([category_corrects[category_name], corrects], axis=0) category_corrects["Average"] = np.concatenate([category_corrects["Average"], corrects], axis=0) @@ -100,10 +101,13 @@ class Evaluator: self._save_results(category_corrects, results) def _save_results(self, category_corrects: Dict[str, np.ndarray], results: Dict[str, Dict[int, str]]) -> None: - score_info = "\n".join([ - "{:>15}: {:.2f}".format(category_name, 100 * np.mean(category_correct)) - for category_name, category_correct in category_corrects.items() if len(category_correct) - ]) + score_info = "\n".join( + [ + "{:>15}: {:.2f}".format(category_name, 100 * np.mean(category_correct)) + for category_name, category_correct in category_corrects.items() + if len(category_correct) + ] + ) print(score_info) if self.eval_args.save_dir is not None: os.makedirs(self.eval_args.save_dir, exist_ok=False) diff --git a/src/llmtuner/eval/template.py b/src/llmtuner/eval/template.py index 2c697510..b17f7084 100644 --- a/src/llmtuner/eval/template.py +++ b/src/llmtuner/eval/template.py @@ -1,8 +1,9 @@ from dataclasses import dataclass from typing import TYPE_CHECKING, Dict, List, Tuple -from ..extras.constants import CHOICES from ..data import Role +from ..extras.constants import CHOICES + if TYPE_CHECKING: from datasets import Dataset @@ -10,24 +11,17 @@ if TYPE_CHECKING: @dataclass class EvalTemplate: - system: str choice: str answer: str prefix: str - def parse_example( - self, - example: Dict[str, str] - ) -> Tuple[str, str]: + def parse_example(self, example: Dict[str, str]) -> Tuple[str, str]: candidates = [self.choice.format(choice=ch, content=example[ch]) for ch in CHOICES if ch in example] return "".join([example["question"]] + candidates + [self.answer]), example["answer"] def format_example( - self, - target_data: Dict[str, str], - support_set: "Dataset", - subject_name: str + self, target_data: Dict[str, str], support_set: "Dataset", subject_name: str ) -> List[Dict[str, str]]: messages = [] for k in range(len(support_set)): @@ -45,19 +39,8 @@ class EvalTemplate: eval_templates: Dict[str, "EvalTemplate"] = {} -def register_eval_template( - name: str, - system: str, - choice: str, - answer: str, - prefix: str -) -> None: - eval_templates[name] = EvalTemplate( - system=system, - choice=choice, - answer=answer, - prefix=prefix - ) +def register_eval_template(name: str, system: str, choice: str, answer: str, prefix: str) -> None: + eval_templates[name] = EvalTemplate(system=system, choice=choice, answer=answer, prefix=prefix) def get_eval_template(name: str) -> "EvalTemplate": @@ -71,7 +54,7 @@ register_eval_template( system="The following are multiple choice questions (with answers) about {subject}.\n\n", choice="\n{choice}. {content}", answer="\nAnswer: ", - prefix=" " + prefix=" ", ) @@ -80,5 +63,5 @@ register_eval_template( system="以下是中国关于{subject}考试的单项选择题,请选出其中的正确答案。\n\n", choice="\n{choice}. {content}", answer="\n答案:", - prefix="\n" + prefix="\n", ) diff --git a/src/llmtuner/extras/callbacks.py b/src/llmtuner/extras/callbacks.py index b97d0168..086dea6d 100644 --- a/src/llmtuner/extras/callbacks.py +++ b/src/llmtuner/extras/callbacks.py @@ -1,10 +1,11 @@ -import os import json +import os import time -from typing import TYPE_CHECKING from datetime import timedelta +from typing import TYPE_CHECKING + from transformers import TrainerCallback -from transformers.trainer_utils import has_length, PREFIX_CHECKPOINT_DIR +from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR, has_length from .constants import LOG_FILE_NAME from .logging import get_logger @@ -12,14 +13,13 @@ from .misc import fix_valuehead_checkpoint if TYPE_CHECKING: - from transformers import TrainingArguments, TrainerState, TrainerControl + from transformers import TrainerControl, TrainerState, TrainingArguments logger = get_logger(__name__) class FixValueHeadModelCallback(TrainerCallback): - def on_save(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs): r""" Event called after a checkpoint save. @@ -28,12 +28,11 @@ class FixValueHeadModelCallback(TrainerCallback): fix_valuehead_checkpoint( model=kwargs.pop("model"), output_dir=os.path.join(args.output_dir, "{}-{}".format(PREFIX_CHECKPOINT_DIR, state.global_step)), - safe_serialization=args.save_safetensors + safe_serialization=args.save_safetensors, ) class LogCallback(TrainerCallback): - def __init__(self, runner=None): self.runner = runner self.in_training = False @@ -99,7 +98,9 @@ class LogCallback(TrainerCallback): self.cur_steps = 0 self.max_steps = 0 - def on_predict(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", *other, **kwargs): + def on_predict( + self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", *other, **kwargs + ): r""" Event called after a successful prediction. """ @@ -125,18 +126,22 @@ class LogCallback(TrainerCallback): epoch=state.log_history[-1].get("epoch", None), percentage=round(self.cur_steps / self.max_steps * 100, 2) if self.max_steps != 0 else 100, elapsed_time=self.elapsed_time, - remaining_time=self.remaining_time + remaining_time=self.remaining_time, ) if self.runner is not None: - logger.info("{{'loss': {:.4f}, 'learning_rate': {:2.4e}, 'epoch': {:.2f}}}".format( - logs["loss"] or 0, logs["learning_rate"] or 0, logs["epoch"] or 0 - )) + logger.info( + "{{'loss': {:.4f}, 'learning_rate': {:2.4e}, 'epoch': {:.2f}}}".format( + logs["loss"] or 0, logs["learning_rate"] or 0, logs["epoch"] or 0 + ) + ) os.makedirs(args.output_dir, exist_ok=True) with open(os.path.join(args.output_dir, "trainer_log.jsonl"), "a", encoding="utf-8") as f: f.write(json.dumps(logs) + "\n") - def on_prediction_step(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs): + def on_prediction_step( + self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs + ): r""" Event called after a prediction step. """ diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py index 2c9fac93..0b4f5a07 100644 --- a/src/llmtuner/extras/constants.py +++ b/src/llmtuner/extras/constants.py @@ -1,5 +1,5 @@ +from collections import OrderedDict, defaultdict from enum import Enum -from collections import defaultdict, OrderedDict from typing import Dict, Optional @@ -11,14 +11,7 @@ DEFAULT_MODULE = defaultdict(str) DEFAULT_TEMPLATE = defaultdict(str) -FILEEXT2TYPE = { - "arrow": "arrow", - "csv": "csv", - "json": "json", - "jsonl": "json", - "parquet": "parquet", - "txt": "text" -} +FILEEXT2TYPE = {"arrow": "arrow", "csv": "csv", "json": "json", "jsonl": "json", "parquet": "parquet", "txt": "text"} IGNORE_INDEX = -100 @@ -39,22 +32,21 @@ TRAINING_STAGES = { "Reward Modeling": "rm", "PPO": "ppo", "DPO": "dpo", - "Pre-Training": "pt" + "Pre-Training": "pt", } V_HEAD_WEIGHTS_NAME = "value_head.bin" V_HEAD_SAFE_WEIGHTS_NAME = "value_head.safetensors" + class DownloadSource(str, Enum): DEFAULT = "hf" MODELSCOPE = "ms" def register_model_group( - models: Dict[str, Dict[DownloadSource, str]], - module: Optional[str] = None, - template: Optional[str] = None + models: Dict[str, Dict[DownloadSource, str]], module: Optional[str] = None, template: Optional[str] = None ) -> None: prefix = None for name, path in models.items(): @@ -73,19 +65,19 @@ register_model_group( models={ "Baichuan-7B-Base": { DownloadSource.DEFAULT: "baichuan-inc/Baichuan-7B", - DownloadSource.MODELSCOPE: "baichuan-inc/baichuan-7B" + DownloadSource.MODELSCOPE: "baichuan-inc/baichuan-7B", }, "Baichuan-13B-Base": { DownloadSource.DEFAULT: "baichuan-inc/Baichuan-13B-Base", - DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan-13B-Base" + DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan-13B-Base", }, "Baichuan-13B-Chat": { DownloadSource.DEFAULT: "baichuan-inc/Baichuan-13B-Chat", - DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan-13B-Chat" - } + DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan-13B-Chat", + }, }, module="W_pack", - template="baichuan" + template="baichuan", ) @@ -93,23 +85,23 @@ register_model_group( models={ "Baichuan2-7B-Base": { DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-7B-Base", - DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-7B-Base" + DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-7B-Base", }, "Baichuan2-13B-Base": { DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-13B-Base", - DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-13B-Base" + DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-13B-Base", }, "Baichuan2-7B-Chat": { DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-7B-Chat", - DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-7B-Chat" + DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-7B-Chat", }, "Baichuan2-13B-Chat": { DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-13B-Chat", - DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-13B-Chat" - } + DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-13B-Chat", + }, }, module="W_pack", - template="baichuan2" + template="baichuan2", ) @@ -117,18 +109,18 @@ register_model_group( models={ "BLOOM-560M": { DownloadSource.DEFAULT: "bigscience/bloom-560m", - DownloadSource.MODELSCOPE: "AI-ModelScope/bloom-560m" + DownloadSource.MODELSCOPE: "AI-ModelScope/bloom-560m", }, "BLOOM-3B": { DownloadSource.DEFAULT: "bigscience/bloom-3b", - DownloadSource.MODELSCOPE: "AI-ModelScope/bloom-3b" + DownloadSource.MODELSCOPE: "AI-ModelScope/bloom-3b", }, "BLOOM-7B1": { DownloadSource.DEFAULT: "bigscience/bloom-7b1", - DownloadSource.MODELSCOPE: "AI-ModelScope/bloom-7b1" - } + DownloadSource.MODELSCOPE: "AI-ModelScope/bloom-7b1", + }, }, - module="query_key_value" + module="query_key_value", ) @@ -136,18 +128,18 @@ register_model_group( models={ "BLOOMZ-560M": { DownloadSource.DEFAULT: "bigscience/bloomz-560m", - DownloadSource.MODELSCOPE: "AI-ModelScope/bloomz-560m" + DownloadSource.MODELSCOPE: "AI-ModelScope/bloomz-560m", }, "BLOOMZ-3B": { DownloadSource.DEFAULT: "bigscience/bloomz-3b", - DownloadSource.MODELSCOPE: "AI-ModelScope/bloomz-3b" + DownloadSource.MODELSCOPE: "AI-ModelScope/bloomz-3b", }, "BLOOMZ-7B1-mt": { DownloadSource.DEFAULT: "bigscience/bloomz-7b1-mt", - DownloadSource.MODELSCOPE: "AI-ModelScope/bloomz-7b1-mt" - } + DownloadSource.MODELSCOPE: "AI-ModelScope/bloomz-7b1-mt", + }, }, - module="query_key_value" + module="query_key_value", ) @@ -155,14 +147,14 @@ register_model_group( models={ "BlueLM-7B-Base": { DownloadSource.DEFAULT: "vivo-ai/BlueLM-7B-Base", - DownloadSource.MODELSCOPE: "vivo-ai/BlueLM-7B-Base" + DownloadSource.MODELSCOPE: "vivo-ai/BlueLM-7B-Base", }, "BlueLM-7B-Chat": { DownloadSource.DEFAULT: "vivo-ai/BlueLM-7B-Chat", - DownloadSource.MODELSCOPE: "vivo-ai/BlueLM-7B-Chat" - } + DownloadSource.MODELSCOPE: "vivo-ai/BlueLM-7B-Chat", + }, }, - template="bluelm" + template="bluelm", ) @@ -170,11 +162,11 @@ register_model_group( models={ "ChatGLM2-6B-Chat": { DownloadSource.DEFAULT: "THUDM/chatglm2-6b", - DownloadSource.MODELSCOPE: "ZhipuAI/chatglm2-6b" + DownloadSource.MODELSCOPE: "ZhipuAI/chatglm2-6b", } }, module="query_key_value", - template="chatglm2" + template="chatglm2", ) @@ -182,15 +174,15 @@ register_model_group( models={ "ChatGLM3-6B-Base": { DownloadSource.DEFAULT: "THUDM/chatglm3-6b-base", - DownloadSource.MODELSCOPE: "ZhipuAI/chatglm3-6b-base" + DownloadSource.MODELSCOPE: "ZhipuAI/chatglm3-6b-base", }, "ChatGLM3-6B-Chat": { DownloadSource.DEFAULT: "THUDM/chatglm3-6b", - DownloadSource.MODELSCOPE: "ZhipuAI/chatglm3-6b" - } + DownloadSource.MODELSCOPE: "ZhipuAI/chatglm3-6b", + }, }, module="query_key_value", - template="chatglm3" + template="chatglm3", ) @@ -198,30 +190,30 @@ register_model_group( models={ "ChineseLLaMA2-1.3B": { DownloadSource.DEFAULT: "hfl/chinese-llama-2-1.3b", - DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-llama-2-1.3b" + DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-llama-2-1.3b", }, "ChineseLLaMA2-7B": { DownloadSource.DEFAULT: "hfl/chinese-llama-2-7b", - DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-llama-2-7b" + DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-llama-2-7b", }, "ChineseLLaMA2-13B": { DownloadSource.DEFAULT: "hfl/chinese-llama-2-13b", - DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-llama-2-13b" + DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-llama-2-13b", }, "ChineseLLaMA2-1.3B-Chat": { DownloadSource.DEFAULT: "hfl/chinese-alpaca-2-1.3b", - DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-alpaca-2-1.3b" + DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-alpaca-2-1.3b", }, "ChineseLLaMA2-7B-Chat": { DownloadSource.DEFAULT: "hfl/chinese-alpaca-2-7b", - DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-alpaca-2-7b" + DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-alpaca-2-7b", }, "ChineseLLaMA2-13B-Chat": { DownloadSource.DEFAULT: "hfl/chinese-alpaca-2-13b", - DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-alpaca-2-13b" - } + DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-alpaca-2-13b", + }, }, - template="llama2_zh" + template="llama2_zh", ) @@ -229,22 +221,22 @@ register_model_group( models={ "DeepSeekLLM-7B-Base": { DownloadSource.DEFAULT: "deepseek-ai/deepseek-llm-7b-base", - DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-llm-7b-base" + DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-llm-7b-base", }, "DeepSeekLLM-67B-Base": { DownloadSource.DEFAULT: "deepseek-ai/deepseek-llm-67b-base", - DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-llm-67b-base" + DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-llm-67b-base", }, "DeepSeekLLM-7B-Chat": { DownloadSource.DEFAULT: "deepseek-ai/deepseek-llm-7b-chat", - DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-llm-7b-chat" + DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-llm-7b-chat", }, "DeepSeekLLM-67B-Chat": { DownloadSource.DEFAULT: "deepseek-ai/deepseek-llm-67b-chat", - DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-llm-67b-chat" - } + DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-llm-67b-chat", + }, }, - template="deepseek" + template="deepseek", ) @@ -252,22 +244,22 @@ register_model_group( models={ "DeepSeekCoder-6.7B-Base": { DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-6.7b-base", - DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-6.7b-base" + DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-6.7b-base", }, "DeepSeekCoder-33B-Base": { DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-33b-base", - DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-33b-base" + DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-33b-base", }, "DeepSeekCoder-6.7B-Chat": { DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-6.7b-instruct", - DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-6.7b-instruct" + DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-6.7b-instruct", }, "DeepSeekCoder-33B-Chat": { DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-33b-instruct", - DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-33b-instruct" - } + DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-33b-instruct", + }, }, - template="deepseekcoder" + template="deepseekcoder", ) @@ -275,14 +267,14 @@ register_model_group( models={ "DeepSeekMoE-16B-Base": { DownloadSource.DEFAULT: "deepseek-ai/deepseek-moe-16b-base", - DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-moe-16b-base" + DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-moe-16b-base", }, "DeepSeekMoE-16B-Chat": { DownloadSource.DEFAULT: "deepseek-ai/deepseek-moe-16b-chat", - DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-moe-16b-chat" - } + DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-moe-16b-chat", + }, }, - template="deepseek" + template="deepseek", ) @@ -290,31 +282,31 @@ register_model_group( models={ "Falcon-7B": { DownloadSource.DEFAULT: "tiiuae/falcon-7b", - DownloadSource.MODELSCOPE: "AI-ModelScope/falcon-7b" + DownloadSource.MODELSCOPE: "AI-ModelScope/falcon-7b", }, "Falcon-40B": { DownloadSource.DEFAULT: "tiiuae/falcon-40b", - DownloadSource.MODELSCOPE: "AI-ModelScope/falcon-40b" + DownloadSource.MODELSCOPE: "AI-ModelScope/falcon-40b", }, "Falcon-180B": { DownloadSource.DEFAULT: "tiiuae/falcon-180b", - DownloadSource.MODELSCOPE: "modelscope/falcon-180B" + DownloadSource.MODELSCOPE: "modelscope/falcon-180B", }, "Falcon-7B-Chat": { DownloadSource.DEFAULT: "tiiuae/falcon-7b-instruct", - DownloadSource.MODELSCOPE: "AI-ModelScope/falcon-7b-instruct" + DownloadSource.MODELSCOPE: "AI-ModelScope/falcon-7b-instruct", }, "Falcon-40B-Chat": { DownloadSource.DEFAULT: "tiiuae/falcon-40b-instruct", - DownloadSource.MODELSCOPE: "AI-ModelScope/falcon-40b-instruct" + DownloadSource.MODELSCOPE: "AI-ModelScope/falcon-40b-instruct", }, "Falcon-180B-Chat": { DownloadSource.DEFAULT: "tiiuae/falcon-180b-chat", - DownloadSource.MODELSCOPE: "modelscope/falcon-180B-chat" - } + DownloadSource.MODELSCOPE: "modelscope/falcon-180B-chat", + }, }, module="query_key_value", - template="falcon" + template="falcon", ) @@ -322,22 +314,22 @@ register_model_group( models={ "InternLM-7B": { DownloadSource.DEFAULT: "internlm/internlm-7b", - DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm-7b" + DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm-7b", }, "InternLM-20B": { DownloadSource.DEFAULT: "internlm/internlm-20b", - DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm-20b" + DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm-20b", }, "InternLM-7B-Chat": { DownloadSource.DEFAULT: "internlm/internlm-chat-7b", - DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm-chat-7b" + DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm-chat-7b", }, "InternLM-20B-Chat": { DownloadSource.DEFAULT: "internlm/internlm-chat-20b", - DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm-chat-20b" - } + DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm-chat-20b", + }, }, - template="intern" + template="intern", ) @@ -345,23 +337,23 @@ register_model_group( models={ "InternLM2-7B": { DownloadSource.DEFAULT: "internlm/internlm2-7b", - DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2-7b" + DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2-7b", }, "InternLM2-20B": { DownloadSource.DEFAULT: "internlm/internlm2-20b", - DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2-20b" + DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2-20b", }, "InternLM2-7B-Chat": { DownloadSource.DEFAULT: "internlm/internlm2-chat-7b", - DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2-chat-7b" + DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2-chat-7b", }, "InternLM2-20B-Chat": { DownloadSource.DEFAULT: "internlm/internlm2-chat-20b", - DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2-chat-20b" - } + DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2-chat-20b", + }, }, module="wqkv", - template="intern2" + template="intern2", ) @@ -369,31 +361,28 @@ register_model_group( models={ "LingoWhale-8B": { DownloadSource.DEFAULT: "deeplang-ai/LingoWhale-8B", - DownloadSource.MODELSCOPE: "DeepLang/LingoWhale-8B" + DownloadSource.MODELSCOPE: "DeepLang/LingoWhale-8B", } }, - module="qkv_proj" + module="qkv_proj", ) register_model_group( models={ - "LLaMA-7B": { - DownloadSource.DEFAULT: "huggyllama/llama-7b", - DownloadSource.MODELSCOPE: "skyline2006/llama-7b" - }, + "LLaMA-7B": {DownloadSource.DEFAULT: "huggyllama/llama-7b", DownloadSource.MODELSCOPE: "skyline2006/llama-7b"}, "LLaMA-13B": { DownloadSource.DEFAULT: "huggyllama/llama-13b", - DownloadSource.MODELSCOPE: "skyline2006/llama-13b" + DownloadSource.MODELSCOPE: "skyline2006/llama-13b", }, "LLaMA-30B": { DownloadSource.DEFAULT: "huggyllama/llama-30b", - DownloadSource.MODELSCOPE: "skyline2006/llama-30b" + DownloadSource.MODELSCOPE: "skyline2006/llama-30b", }, "LLaMA-65B": { DownloadSource.DEFAULT: "huggyllama/llama-65b", - DownloadSource.MODELSCOPE: "skyline2006/llama-65b" - } + DownloadSource.MODELSCOPE: "skyline2006/llama-65b", + }, } ) @@ -402,30 +391,30 @@ register_model_group( models={ "LLaMA2-7B": { DownloadSource.DEFAULT: "meta-llama/Llama-2-7b-hf", - DownloadSource.MODELSCOPE: "modelscope/Llama-2-7b-ms" + DownloadSource.MODELSCOPE: "modelscope/Llama-2-7b-ms", }, "LLaMA2-13B": { DownloadSource.DEFAULT: "meta-llama/Llama-2-13b-hf", - DownloadSource.MODELSCOPE: "modelscope/Llama-2-13b-ms" + DownloadSource.MODELSCOPE: "modelscope/Llama-2-13b-ms", }, "LLaMA2-70B": { DownloadSource.DEFAULT: "meta-llama/Llama-2-70b-hf", - DownloadSource.MODELSCOPE: "modelscope/Llama-2-70b-ms" + DownloadSource.MODELSCOPE: "modelscope/Llama-2-70b-ms", }, "LLaMA2-7B-Chat": { DownloadSource.DEFAULT: "meta-llama/Llama-2-7b-chat-hf", - DownloadSource.MODELSCOPE: "modelscope/Llama-2-7b-chat-ms" + DownloadSource.MODELSCOPE: "modelscope/Llama-2-7b-chat-ms", }, "LLaMA2-13B-Chat": { DownloadSource.DEFAULT: "meta-llama/Llama-2-13b-chat-hf", - DownloadSource.MODELSCOPE: "modelscope/Llama-2-13b-chat-ms" + DownloadSource.MODELSCOPE: "modelscope/Llama-2-13b-chat-ms", }, "LLaMA2-70B-Chat": { DownloadSource.DEFAULT: "meta-llama/Llama-2-70b-chat-hf", - DownloadSource.MODELSCOPE: "modelscope/Llama-2-70b-chat-ms" - } + DownloadSource.MODELSCOPE: "modelscope/Llama-2-70b-chat-ms", + }, }, - template="llama2" + template="llama2", ) @@ -433,18 +422,18 @@ register_model_group( models={ "Mistral-7B": { DownloadSource.DEFAULT: "mistralai/Mistral-7B-v0.1", - DownloadSource.MODELSCOPE: "AI-ModelScope/Mistral-7B-v0.1" + DownloadSource.MODELSCOPE: "AI-ModelScope/Mistral-7B-v0.1", }, "Mistral-7B-Chat": { DownloadSource.DEFAULT: "mistralai/Mistral-7B-Instruct-v0.1", - DownloadSource.MODELSCOPE: "AI-ModelScope/Mistral-7B-Instruct-v0.1" + DownloadSource.MODELSCOPE: "AI-ModelScope/Mistral-7B-Instruct-v0.1", }, "Mistral-7B-v0.2-Chat": { DownloadSource.DEFAULT: "mistralai/Mistral-7B-Instruct-v0.2", - DownloadSource.MODELSCOPE: "AI-ModelScope/Mistral-7B-Instruct-v0.2" - } + DownloadSource.MODELSCOPE: "AI-ModelScope/Mistral-7B-Instruct-v0.2", + }, }, - template="mistral" + template="mistral", ) @@ -452,14 +441,14 @@ register_model_group( models={ "Mixtral-8x7B": { DownloadSource.DEFAULT: "mistralai/Mixtral-8x7B-v0.1", - DownloadSource.MODELSCOPE: "AI-ModelScope/Mixtral-8x7B-v0.1" + DownloadSource.MODELSCOPE: "AI-ModelScope/Mixtral-8x7B-v0.1", }, "Mixtral-8x7B-Chat": { DownloadSource.DEFAULT: "mistralai/Mixtral-8x7B-Instruct-v0.1", - DownloadSource.MODELSCOPE: "AI-ModelScope/Mixtral-8x7B-Instruct-v0.1" - } + DownloadSource.MODELSCOPE: "AI-ModelScope/Mixtral-8x7B-Instruct-v0.1", + }, }, - template="mistral" + template="mistral", ) @@ -467,110 +456,87 @@ register_model_group( models={ "OpenChat3.5-7B-Chat": { DownloadSource.DEFAULT: "openchat/openchat_3.5", - DownloadSource.MODELSCOPE: "myxiongmodel/openchat_3.5" + DownloadSource.MODELSCOPE: "myxiongmodel/openchat_3.5", } }, - template="openchat" + template="openchat", ) register_model_group( models={ - "Phi-1.5-1.3B": { - DownloadSource.DEFAULT: "microsoft/phi-1_5", - DownloadSource.MODELSCOPE: "allspace/PHI_1-5" - }, - "Phi-2-2.7B": { - DownloadSource.DEFAULT: "microsoft/phi-2", - DownloadSource.MODELSCOPE: "AI-ModelScope/phi-2" - } + "Phi-1.5-1.3B": {DownloadSource.DEFAULT: "microsoft/phi-1_5", DownloadSource.MODELSCOPE: "allspace/PHI_1-5"}, + "Phi-2-2.7B": {DownloadSource.DEFAULT: "microsoft/phi-2", DownloadSource.MODELSCOPE: "AI-ModelScope/phi-2"}, } ) register_model_group( models={ - "Qwen-1.8B": { - DownloadSource.DEFAULT: "Qwen/Qwen-1_8B", - DownloadSource.MODELSCOPE: "qwen/Qwen-1_8B" - }, - "Qwen-7B": { - DownloadSource.DEFAULT: "Qwen/Qwen-7B", - DownloadSource.MODELSCOPE: "qwen/Qwen-7B" - }, - "Qwen-14B": { - DownloadSource.DEFAULT: "Qwen/Qwen-14B", - DownloadSource.MODELSCOPE: "qwen/Qwen-14B" - }, - "Qwen-72B": { - DownloadSource.DEFAULT: "Qwen/Qwen-72B", - DownloadSource.MODELSCOPE: "qwen/Qwen-72B" - }, + "Qwen-1.8B": {DownloadSource.DEFAULT: "Qwen/Qwen-1_8B", DownloadSource.MODELSCOPE: "qwen/Qwen-1_8B"}, + "Qwen-7B": {DownloadSource.DEFAULT: "Qwen/Qwen-7B", DownloadSource.MODELSCOPE: "qwen/Qwen-7B"}, + "Qwen-14B": {DownloadSource.DEFAULT: "Qwen/Qwen-14B", DownloadSource.MODELSCOPE: "qwen/Qwen-14B"}, + "Qwen-72B": {DownloadSource.DEFAULT: "Qwen/Qwen-72B", DownloadSource.MODELSCOPE: "qwen/Qwen-72B"}, "Qwen-1.8B-Chat": { DownloadSource.DEFAULT: "Qwen/Qwen-1_8B-Chat", - DownloadSource.MODELSCOPE: "qwen/Qwen-1_8B-Chat" - }, - "Qwen-7B-Chat": { - DownloadSource.DEFAULT: "Qwen/Qwen-7B-Chat", - DownloadSource.MODELSCOPE: "qwen/Qwen-7B-Chat" + DownloadSource.MODELSCOPE: "qwen/Qwen-1_8B-Chat", }, + "Qwen-7B-Chat": {DownloadSource.DEFAULT: "Qwen/Qwen-7B-Chat", DownloadSource.MODELSCOPE: "qwen/Qwen-7B-Chat"}, "Qwen-14B-Chat": { DownloadSource.DEFAULT: "Qwen/Qwen-14B-Chat", - DownloadSource.MODELSCOPE: "qwen/Qwen-14B-Chat" + DownloadSource.MODELSCOPE: "qwen/Qwen-14B-Chat", }, "Qwen-72B-Chat": { DownloadSource.DEFAULT: "Qwen/Qwen-72B-Chat", - DownloadSource.MODELSCOPE: "qwen/Qwen-72B-Chat" + DownloadSource.MODELSCOPE: "qwen/Qwen-72B-Chat", }, "Qwen-1.8B-int8-Chat": { DownloadSource.DEFAULT: "Qwen/Qwen-1_8B-Chat-Int8", - DownloadSource.MODELSCOPE: "qwen/Qwen-1_8B-Chat-Int8" + DownloadSource.MODELSCOPE: "qwen/Qwen-1_8B-Chat-Int8", }, "Qwen-1.8B-int4-Chat": { DownloadSource.DEFAULT: "Qwen/Qwen-1_8B-Chat-Int4", - DownloadSource.MODELSCOPE: "qwen/Qwen-1_8B-Chat-Int4" + DownloadSource.MODELSCOPE: "qwen/Qwen-1_8B-Chat-Int4", }, "Qwen-7B-int8-Chat": { DownloadSource.DEFAULT: "Qwen/Qwen-7B-Chat-Int8", - DownloadSource.MODELSCOPE: "qwen/Qwen-7B-Chat-Int8" + DownloadSource.MODELSCOPE: "qwen/Qwen-7B-Chat-Int8", }, "Qwen-7B-int4-Chat": { DownloadSource.DEFAULT: "Qwen/Qwen-7B-Chat-Int4", - DownloadSource.MODELSCOPE: "qwen/Qwen-7B-Chat-Int4" + DownloadSource.MODELSCOPE: "qwen/Qwen-7B-Chat-Int4", }, "Qwen-14B-int8-Chat": { DownloadSource.DEFAULT: "Qwen/Qwen-14B-Chat-Int8", - DownloadSource.MODELSCOPE: "qwen/Qwen-14B-Chat-Int8" + DownloadSource.MODELSCOPE: "qwen/Qwen-14B-Chat-Int8", }, "Qwen-14B-int4-Chat": { DownloadSource.DEFAULT: "Qwen/Qwen-14B-Chat-Int4", - DownloadSource.MODELSCOPE: "qwen/Qwen-14B-Chat-Int4" + DownloadSource.MODELSCOPE: "qwen/Qwen-14B-Chat-Int4", }, "Qwen-72B-int8-Chat": { DownloadSource.DEFAULT: "Qwen/Qwen-72B-Chat-Int8", - DownloadSource.MODELSCOPE: "qwen/Qwen-72B-Chat-Int8" + DownloadSource.MODELSCOPE: "qwen/Qwen-72B-Chat-Int8", }, "Qwen-72B-int4-Chat": { DownloadSource.DEFAULT: "Qwen/Qwen-72B-Chat-Int4", - DownloadSource.MODELSCOPE: "qwen/Qwen-72B-Chat-Int4" - } + DownloadSource.MODELSCOPE: "qwen/Qwen-72B-Chat-Int4", + }, }, module="c_attn", - template="qwen" + template="qwen", ) register_model_group( models={ - "SOLAR-10.7B": { - DownloadSource.DEFAULT: "upstage/SOLAR-10.7B-v1.0" - }, + "SOLAR-10.7B": {DownloadSource.DEFAULT: "upstage/SOLAR-10.7B-v1.0"}, "SOLAR-10.7B-Chat": { DownloadSource.DEFAULT: "upstage/SOLAR-10.7B-Instruct-v1.0", - DownloadSource.MODELSCOPE: "AI-ModelScope/SOLAR-10.7B-Instruct-v1.0" - } + DownloadSource.MODELSCOPE: "AI-ModelScope/SOLAR-10.7B-Instruct-v1.0", + }, }, - template="solar" + template="solar", ) @@ -578,7 +544,7 @@ register_model_group( models={ "Skywork-13B-Base": { DownloadSource.DEFAULT: "Skywork/Skywork-13B-base", - DownloadSource.MODELSCOPE: "skywork/Skywork-13B-base" + DownloadSource.MODELSCOPE: "skywork/Skywork-13B-base", } } ) @@ -588,68 +554,51 @@ register_model_group( models={ "Vicuna1.5-7B-Chat": { DownloadSource.DEFAULT: "lmsys/vicuna-7b-v1.5", - DownloadSource.MODELSCOPE: "Xorbits/vicuna-7b-v1.5" + DownloadSource.MODELSCOPE: "Xorbits/vicuna-7b-v1.5", }, "Vicuna1.5-13B-Chat": { DownloadSource.DEFAULT: "lmsys/vicuna-13b-v1.5", - DownloadSource.MODELSCOPE: "Xorbits/vicuna-13b-v1.5" - } + DownloadSource.MODELSCOPE: "Xorbits/vicuna-13b-v1.5", + }, }, - template="vicuna" + template="vicuna", ) register_model_group( models={ - "XuanYuan-70B": { - DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B" - }, - "XuanYuan-70B-Chat": { - DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B-Chat" - }, - "XuanYuan-70B-int8-Chat": { - DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B-Chat-8bit" - }, - "XuanYuan-70B-int4-Chat": { - DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B-Chat-4bit" - } + "XuanYuan-70B": {DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B"}, + "XuanYuan-70B-Chat": {DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B-Chat"}, + "XuanYuan-70B-int8-Chat": {DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B-Chat-8bit"}, + "XuanYuan-70B-int4-Chat": {DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B-Chat-4bit"}, }, - template="xuanyuan" + template="xuanyuan", ) register_model_group( models={ - "XVERSE-7B": { - DownloadSource.DEFAULT: "xverse/XVERSE-7B", - DownloadSource.MODELSCOPE: "xverse/XVERSE-7B" - }, - "XVERSE-13B": { - DownloadSource.DEFAULT: "xverse/XVERSE-13B", - DownloadSource.MODELSCOPE: "xverse/XVERSE-13B" - }, - "XVERSE-65B": { - DownloadSource.DEFAULT: "xverse/XVERSE-65B", - DownloadSource.MODELSCOPE: "xverse/XVERSE-65B" - }, + "XVERSE-7B": {DownloadSource.DEFAULT: "xverse/XVERSE-7B", DownloadSource.MODELSCOPE: "xverse/XVERSE-7B"}, + "XVERSE-13B": {DownloadSource.DEFAULT: "xverse/XVERSE-13B", DownloadSource.MODELSCOPE: "xverse/XVERSE-13B"}, + "XVERSE-65B": {DownloadSource.DEFAULT: "xverse/XVERSE-65B", DownloadSource.MODELSCOPE: "xverse/XVERSE-65B"}, "XVERSE-65B-2": { DownloadSource.DEFAULT: "xverse/XVERSE-65B-2", - DownloadSource.MODELSCOPE: "xverse/XVERSE-65B-2" + DownloadSource.MODELSCOPE: "xverse/XVERSE-65B-2", }, "XVERSE-7B-Chat": { DownloadSource.DEFAULT: "xverse/XVERSE-7B-Chat", - DownloadSource.MODELSCOPE: "xverse/XVERSE-7B-Chat" + DownloadSource.MODELSCOPE: "xverse/XVERSE-7B-Chat", }, "XVERSE-13B-Chat": { DownloadSource.DEFAULT: "xverse/XVERSE-13B-Chat", - DownloadSource.MODELSCOPE: "xverse/XVERSE-13B-Chat" + DownloadSource.MODELSCOPE: "xverse/XVERSE-13B-Chat", }, "XVERSE-65B-Chat": { DownloadSource.DEFAULT: "xverse/XVERSE-65B-Chat", - DownloadSource.MODELSCOPE: "xverse/XVERSE-65B-Chat" - } + DownloadSource.MODELSCOPE: "xverse/XVERSE-65B-Chat", + }, }, - template="xverse" + template="xverse", ) @@ -657,45 +606,33 @@ register_model_group( models={ "Yayi-7B": { DownloadSource.DEFAULT: "wenge-research/yayi-7b-llama2", - DownloadSource.MODELSCOPE: "AI-ModelScope/yayi-7b-llama2" + DownloadSource.MODELSCOPE: "AI-ModelScope/yayi-7b-llama2", }, "Yayi-13B": { DownloadSource.DEFAULT: "wenge-research/yayi-13b-llama2", - DownloadSource.MODELSCOPE: "AI-ModelScope/yayi-13b-llama2" - } + DownloadSource.MODELSCOPE: "AI-ModelScope/yayi-13b-llama2", + }, }, - template="yayi" + template="yayi", ) register_model_group( models={ - "Yi-6B": { - DownloadSource.DEFAULT: "01-ai/Yi-6B", - DownloadSource.MODELSCOPE: "01ai/Yi-6B" - }, - "Yi-34B": { - DownloadSource.DEFAULT: "01-ai/Yi-34B", - DownloadSource.MODELSCOPE: "01ai/Yi-34B" - }, - "Yi-6B-Chat": { - DownloadSource.DEFAULT: "01-ai/Yi-6B-Chat", - DownloadSource.MODELSCOPE: "01ai/Yi-6B-Chat" - }, - "Yi-34B-Chat": { - DownloadSource.DEFAULT: "01-ai/Yi-34B-Chat", - DownloadSource.MODELSCOPE: "01ai/Yi-34B-Chat" - }, + "Yi-6B": {DownloadSource.DEFAULT: "01-ai/Yi-6B", DownloadSource.MODELSCOPE: "01ai/Yi-6B"}, + "Yi-34B": {DownloadSource.DEFAULT: "01-ai/Yi-34B", DownloadSource.MODELSCOPE: "01ai/Yi-34B"}, + "Yi-6B-Chat": {DownloadSource.DEFAULT: "01-ai/Yi-6B-Chat", DownloadSource.MODELSCOPE: "01ai/Yi-6B-Chat"}, + "Yi-34B-Chat": {DownloadSource.DEFAULT: "01-ai/Yi-34B-Chat", DownloadSource.MODELSCOPE: "01ai/Yi-34B-Chat"}, "Yi-6B-int8-Chat": { DownloadSource.DEFAULT: "01-ai/Yi-6B-Chat-8bits", - DownloadSource.MODELSCOPE: "01ai/Yi-6B-Chat-8bits" + DownloadSource.MODELSCOPE: "01ai/Yi-6B-Chat-8bits", }, "Yi-34B-int8-Chat": { DownloadSource.DEFAULT: "01-ai/Yi-34B-Chat-8bits", - DownloadSource.MODELSCOPE: "01ai/Yi-34B-Chat-8bits" - } + DownloadSource.MODELSCOPE: "01ai/Yi-34B-Chat-8bits", + }, }, - template="yi" + template="yi", ) @@ -703,18 +640,18 @@ register_model_group( models={ "Yuan2-2B-Chat": { DownloadSource.DEFAULT: "IEITYuan/Yuan2-2B-hf", - DownloadSource.MODELSCOPE: "YuanLLM/Yuan2.0-2B-hf" + DownloadSource.MODELSCOPE: "YuanLLM/Yuan2.0-2B-hf", }, "Yuan2-51B-Chat": { DownloadSource.DEFAULT: "IEITYuan/Yuan2-51B-hf", - DownloadSource.MODELSCOPE: "YuanLLM/Yuan2.0-51B-hf" + DownloadSource.MODELSCOPE: "YuanLLM/Yuan2.0-51B-hf", }, "Yuan2-102B-Chat": { DownloadSource.DEFAULT: "IEITYuan/Yuan2-102B-hf", - DownloadSource.MODELSCOPE: "YuanLLM/Yuan2.0-102B-hf" - } + DownloadSource.MODELSCOPE: "YuanLLM/Yuan2.0-102B-hf", + }, }, - template="yuan" + template="yuan", ) @@ -722,12 +659,12 @@ register_model_group( models={ "Zephyr-7B-Alpha-Chat": { DownloadSource.DEFAULT: "HuggingFaceH4/zephyr-7b-alpha", - DownloadSource.MODELSCOPE: "AI-ModelScope/zephyr-7b-alpha" + DownloadSource.MODELSCOPE: "AI-ModelScope/zephyr-7b-alpha", }, "Zephyr-7B-Beta-Chat": { DownloadSource.DEFAULT: "HuggingFaceH4/zephyr-7b-beta", - DownloadSource.MODELSCOPE: "modelscope/zephyr-7b-beta" - } + DownloadSource.MODELSCOPE: "modelscope/zephyr-7b-beta", + }, }, - template="zephyr" + template="zephyr", ) diff --git a/src/llmtuner/extras/logging.py b/src/llmtuner/extras/logging.py index d01c14a4..bb270776 100644 --- a/src/llmtuner/extras/logging.py +++ b/src/llmtuner/extras/logging.py @@ -1,5 +1,5 @@ -import sys import logging +import sys class LoggerHandler(logging.Handler): @@ -27,8 +27,7 @@ def get_logger(name: str) -> logging.Logger: Gets a standard logger with a stream hander to stdout. """ formatter = logging.Formatter( - fmt="%(asctime)s - %(levelname)s - %(name)s - %(message)s", - datefmt="%m/%d/%Y %H:%M:%S" + fmt="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S" ) handler = logging.StreamHandler(sys.stdout) handler.setFormatter(formatter) diff --git a/src/llmtuner/extras/misc.py b/src/llmtuner/extras/misc.py index 2a1199e4..e892f39a 100644 --- a/src/llmtuner/extras/misc.py +++ b/src/llmtuner/extras/misc.py @@ -1,31 +1,33 @@ import gc import os -import torch from typing import TYPE_CHECKING, Dict, Tuple + +import torch +from peft import PeftModel from transformers import InfNanRemoveLogitsProcessor, LogitsProcessorList, PreTrainedModel from transformers.utils import ( - WEIGHTS_NAME, SAFE_WEIGHTS_NAME, + WEIGHTS_NAME, is_torch_bf16_gpu_available, is_torch_cuda_available, is_torch_npu_available, - is_torch_xpu_available + is_torch_xpu_available, ) -from peft import PeftModel -from .constants import V_HEAD_WEIGHTS_NAME, V_HEAD_SAFE_WEIGHTS_NAME +from .constants import V_HEAD_SAFE_WEIGHTS_NAME, V_HEAD_WEIGHTS_NAME from .logging import get_logger _is_fp16_available = is_torch_npu_available() or is_torch_cuda_available() try: _is_bf16_available = is_torch_bf16_gpu_available() -except: +except Exception: _is_bf16_available = False if TYPE_CHECKING: from trl import AutoModelForCausalLMWithValueHead + from llmtuner.hparams import ModelArguments @@ -36,6 +38,7 @@ class AverageMeter: r""" Computes and stores the average and current value. """ + def __init__(self): self.reset() @@ -75,9 +78,7 @@ def count_parameters(model: torch.nn.Module) -> Tuple[int, int]: def fix_valuehead_checkpoint( - model: "AutoModelForCausalLMWithValueHead", - output_dir: str, - safe_serialization: bool + model: "AutoModelForCausalLMWithValueHead", output_dir: str, safe_serialization: bool ) -> None: r""" The model is already unwrapped. @@ -95,6 +96,7 @@ def fix_valuehead_checkpoint( if safe_serialization: from safetensors import safe_open from safetensors.torch import save_file + path_to_checkpoint = os.path.join(output_dir, SAFE_WEIGHTS_NAME) with safe_open(path_to_checkpoint, framework="pt", device="cpu") as f: state_dict: Dict[str, torch.Tensor] = {key: f.get_tensor(key) for key in f.keys()} @@ -112,9 +114,7 @@ def fix_valuehead_checkpoint( os.remove(path_to_checkpoint) model.pretrained_model.save_pretrained( - output_dir, - state_dict=decoder_state_dict or None, - safe_serialization=safe_serialization + output_dir, state_dict=decoder_state_dict or None, safe_serialization=safe_serialization ) if safe_serialization: @@ -182,11 +182,10 @@ def try_download_model_from_ms(model_args: "ModelArguments") -> None: try: from modelscope import snapshot_download + revision = "master" if model_args.model_revision == "main" else model_args.model_revision model_args.model_name_or_path = snapshot_download( - model_args.model_name_or_path, - revision=revision, - cache_dir=model_args.cache_dir + model_args.model_name_or_path, revision=revision, cache_dir=model_args.cache_dir ) except ImportError: raise ImportError("Please install modelscope via `pip install modelscope -U`") diff --git a/src/llmtuner/extras/packages.py b/src/llmtuner/extras/packages.py index 4fd53346..71893475 100644 --- a/src/llmtuner/extras/packages.py +++ b/src/llmtuner/extras/packages.py @@ -9,7 +9,7 @@ def is_package_available(name: str) -> bool: def get_package_version(name: str) -> str: try: return importlib.metadata.version(name) - except: + except Exception: return "0.0.0" diff --git a/src/llmtuner/extras/patches/llama_patch.py b/src/llmtuner/extras/patches/llama_patch.py index 9e0bcdf5..fa43f769 100644 --- a/src/llmtuner/extras/patches/llama_patch.py +++ b/src/llmtuner/extras/patches/llama_patch.py @@ -1,11 +1,16 @@ import math +from typing import Optional, Tuple + import torch import torch.nn as nn -from typing import Optional, Tuple -from transformers.utils import logging from transformers.models.llama.modeling_llama import ( - Cache, LlamaAttention, LlamaFlashAttention2, apply_rotary_pos_emb, repeat_kv + Cache, + LlamaAttention, + LlamaFlashAttention2, + apply_rotary_pos_emb, + repeat_kv, ) +from transformers.utils import logging logger = logging.get_logger(__name__) @@ -19,7 +24,7 @@ def llama_torch_attn_forward( position_ids: Optional[torch.LongTensor] = None, past_key_value: Optional["Cache"] = None, output_attentions: bool = False, - **kwargs + **kwargs, ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: bsz, q_len, _ = hidden_states.size() @@ -45,15 +50,17 @@ def llama_torch_attn_forward( key_states = repeat_kv(key_states, self.num_key_value_groups) value_states = repeat_kv(value_states, self.num_key_value_groups) - if getattr(self.config, "group_size_ratio", None) and self.training: # shift + if getattr(self.config, "group_size_ratio", None) and self.training: # shift groupsz = int(q_len * getattr(self.config, "group_size_ratio")) assert q_len % groupsz == 0, "q_len {} should be divisible by group size {}.".format(q_len, groupsz) num_groups = q_len // groupsz + def shift(state: torch.Tensor) -> torch.Tensor: - state = state.transpose(1, 2) # output: (bsz, seq_len, n_heads, head_dim) - state = torch.cat(( - state[:, :, :self.num_heads//2], state[:, :, self.num_heads//2:].roll(-groupsz//2, dims=1) - ), dim=2) + state = state.transpose(1, 2) # output: (bsz, seq_len, n_heads, head_dim) + state = torch.cat( + (state[:, :, : self.num_heads // 2], state[:, :, self.num_heads // 2 :].roll(-groupsz // 2, dims=1)), + dim=2, + ) return state.reshape(bsz * num_groups, groupsz, self.num_heads, self.head_dim).transpose(1, 2) query_states, key_states, value_states = shift(query_states), shift(key_states), shift(value_states) @@ -68,14 +75,17 @@ def llama_torch_attn_forward( # upcast attention to fp32 attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype) attn_weights = nn.functional.dropout(attn_weights, p=self.attention_dropout, training=self.training) - attn_output = torch.matmul(attn_weights, value_states) # (bsz, :, seq_len, :) or (bsz*n_group, :, groupsz, :) + attn_output = torch.matmul(attn_weights, value_states) # (bsz, :, seq_len, :) or (bsz*n_group, :, groupsz, :) attn_output = attn_output.transpose(1, 2).contiguous() - if getattr(self.config, "group_size_ratio", None) and self.training: # shift back + if getattr(self.config, "group_size_ratio", None) and self.training: # shift back attn_output.reshape(bsz, q_len, self.num_heads, self.head_dim) - attn_output = torch.cat(( - attn_output[:, :, :self.num_heads//2], attn_output[:, :, self.num_heads//2:].roll(groupsz//2, dims=1) - )) + attn_output = torch.cat( + ( + attn_output[:, :, : self.num_heads // 2], + attn_output[:, :, self.num_heads // 2 :].roll(groupsz // 2, dims=1), + ) + ) attn_output = attn_output.reshape(bsz, q_len, self.hidden_size) attn_output = self.o_proj(attn_output) @@ -94,7 +104,7 @@ def llama_flash_attn_forward( position_ids: Optional[torch.LongTensor] = None, past_key_value: Optional[Tuple[torch.Tensor]] = None, output_attentions: bool = False, - **kwargs + **kwargs, ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: # LlamaFlashAttention2 attention does not support output_attentions output_attentions = False @@ -124,9 +134,9 @@ def llama_flash_attn_forward( key_states = repeat_kv(key_states, self.num_key_value_groups) value_states = repeat_kv(value_states, self.num_key_value_groups) - query_states = query_states.transpose(1, 2) # (bsz, seq_len, n_heads, head_dim) - key_states = key_states.transpose(1, 2) # (bsz, seq_len, n_heads, head_dim) - value_states = value_states.transpose(1, 2) # (bsz, seq_len, n_heads, head_dim) + query_states = query_states.transpose(1, 2) # (bsz, seq_len, n_heads, head_dim) + key_states = key_states.transpose(1, 2) # (bsz, seq_len, n_heads, head_dim) + value_states = value_states.transpose(1, 2) # (bsz, seq_len, n_heads, head_dim) dropout_rate = self.attention_dropout if self.training else 0.0 @@ -144,14 +154,16 @@ def llama_flash_attn_forward( key_states = key_states.to(target_dtype) value_states = value_states.to(target_dtype) - if getattr(self.config, "group_size_ratio", None) and self.training: # shift + if getattr(self.config, "group_size_ratio", None) and self.training: # shift groupsz = int(q_len * getattr(self.config, "group_size_ratio")) assert q_len % groupsz == 0, "q_len {} should be divisible by group size {}.".format(q_len, groupsz) num_groups = q_len // groupsz + def shift(state: torch.Tensor) -> torch.Tensor: - state = torch.cat(( - state[:, :, :self.num_heads//2], state[:, :, self.num_heads//2:].roll(-groupsz//2, dims=1) - ), dim=2) + state = torch.cat( + (state[:, :, : self.num_heads // 2], state[:, :, self.num_heads // 2 :].roll(-groupsz // 2, dims=1)), + dim=2, + ) return state.reshape(bsz * num_groups, groupsz, self.num_heads, self.head_dim) query_states, key_states, value_states = shift(query_states), shift(key_states), shift(value_states) @@ -162,11 +174,14 @@ def llama_flash_attn_forward( query_states, key_states, value_states, attention_mask, q_len, dropout=dropout_rate ) - if getattr(self.config, "group_size_ratio", None) and self.training: # shift back + if getattr(self.config, "group_size_ratio", None) and self.training: # shift back attn_output.reshape(bsz, q_len, self.num_heads, self.head_dim) - attn_output = torch.cat(( - attn_output[:, :, :self.num_heads//2], attn_output[:, :, self.num_heads//2:].roll(groupsz//2, dims=1) - )) + attn_output = torch.cat( + ( + attn_output[:, :, : self.num_heads // 2], + attn_output[:, :, self.num_heads // 2 :].roll(groupsz // 2, dims=1), + ) + ) attn_output = attn_output.reshape(bsz, q_len, self.hidden_size).contiguous() attn_output = self.o_proj(attn_output) diff --git a/src/llmtuner/extras/ploting.py b/src/llmtuner/extras/ploting.py index 65b3bf42..462392f5 100644 --- a/src/llmtuner/extras/ploting.py +++ b/src/llmtuner/extras/ploting.py @@ -1,12 +1,14 @@ -import os -import math import json +import math +import os from typing import List, Optional + from transformers.trainer import TRAINER_STATE_NAME from .logging import get_logger from .packages import is_matplotlib_available + if is_matplotlib_available(): import matplotlib.pyplot as plt @@ -20,7 +22,7 @@ def smooth(scalars: List[float]) -> List[float]: """ last = scalars[0] smoothed = list() - weight = 1.8 * (1 / (1 + math.exp(-0.05 * len(scalars))) - 0.5) # a sigmoid function + weight = 1.8 * (1 / (1 + math.exp(-0.05 * len(scalars))) - 0.5) # a sigmoid function for next_val in scalars: smoothed_val = last * weight + (1 - weight) * next_val smoothed.append(smoothed_val) @@ -29,7 +31,6 @@ def smooth(scalars: List[float]) -> List[float]: def plot_loss(save_dictionary: os.PathLike, keys: Optional[List[str]] = ["loss"]) -> None: - with open(os.path.join(save_dictionary, TRAINER_STATE_NAME), "r", encoding="utf-8") as f: data = json.load(f) diff --git a/src/llmtuner/hparams/__init__.py b/src/llmtuner/hparams/__init__.py index 80deeb72..ef062ac6 100644 --- a/src/llmtuner/hparams/__init__.py +++ b/src/llmtuner/hparams/__init__.py @@ -3,7 +3,7 @@ from .evaluation_args import EvaluationArguments from .finetuning_args import FinetuningArguments from .generating_args import GeneratingArguments from .model_args import ModelArguments -from .parser import get_train_args, get_infer_args, get_eval_args +from .parser import get_eval_args, get_infer_args, get_train_args __all__ = [ @@ -14,5 +14,5 @@ __all__ = [ "ModelArguments", "get_train_args", "get_infer_args", - "get_eval_args" + "get_eval_args", ] diff --git a/src/llmtuner/hparams/data_args.py b/src/llmtuner/hparams/data_args.py index a635e47a..7af2f7f8 100644 --- a/src/llmtuner/hparams/data_args.py +++ b/src/llmtuner/hparams/data_args.py @@ -1,5 +1,5 @@ -from typing import Literal, Optional from dataclasses import dataclass, field +from typing import Literal, Optional @dataclass @@ -8,80 +8,66 @@ class DataArguments: Arguments pertaining to what data we are going to input our model for training and evaluation. """ template: Optional[str] = field( - default=None, - metadata={"help": "Which template to use for constructing prompts in training and inference."} + default=None, metadata={"help": "Which template to use for constructing prompts in training and inference."} ) dataset: Optional[str] = field( default=None, - metadata={"help": "The name of provided dataset(s) to use. Use commas to separate multiple datasets."} + metadata={"help": "The name of provided dataset(s) to use. Use commas to separate multiple datasets."}, ) dataset_dir: Optional[str] = field( - default="data", - metadata={"help": "Path to the folder containing the datasets."} + default="data", metadata={"help": "Path to the folder containing the datasets."} ) split: Optional[str] = field( - default="train", - metadata={"help": "Which dataset split to use for training and evaluation."} + default="train", metadata={"help": "Which dataset split to use for training and evaluation."} ) cutoff_len: Optional[int] = field( - default=1024, - metadata={"help": "The maximum length of the model inputs after tokenization."} + default=1024, metadata={"help": "The maximum length of the model inputs after tokenization."} ) reserved_label_len: Optional[int] = field( - default=1, - metadata={"help": "The maximum length reserved for label after tokenization."} + default=1, metadata={"help": "The maximum length reserved for label after tokenization."} ) train_on_prompt: Optional[bool] = field( - default=False, - metadata={"help": "Whether to disable the mask on the prompt or not."} - ) - streaming: Optional[bool] = field( - default=False, - metadata={"help": "Enable dataset streaming."} + default=False, metadata={"help": "Whether to disable the mask on the prompt or not."} ) + streaming: Optional[bool] = field(default=False, metadata={"help": "Enable dataset streaming."}) buffer_size: Optional[int] = field( - default=16384, - metadata={"help": "Size of the buffer to randomly sample examples from in dataset streaming."} + default=16384, metadata={"help": "Size of the buffer to randomly sample examples from in dataset streaming."} ) mix_strategy: Optional[Literal["concat", "interleave_under", "interleave_over"]] = field( default="concat", - metadata={"help": "Strategy to use in dataset mixing (concat/interleave) (undersampling/oversampling)."} + metadata={"help": "Strategy to use in dataset mixing (concat/interleave) (undersampling/oversampling)."}, ) interleave_probs: Optional[str] = field( default=None, - metadata={"help": "Probabilities to sample data from datasets. Use commas to separate multiple datasets."} + metadata={"help": "Probabilities to sample data from datasets. Use commas to separate multiple datasets."}, ) overwrite_cache: Optional[bool] = field( - default=False, - metadata={"help": "Overwrite the cached training and evaluation sets."} + default=False, metadata={"help": "Overwrite the cached training and evaluation sets."} ) preprocessing_num_workers: Optional[int] = field( - default=None, - metadata={"help": "The number of processes to use for the preprocessing."} + default=None, metadata={"help": "The number of processes to use for the preprocessing."} ) max_samples: Optional[int] = field( - default=None, - metadata={"help": "For debugging purposes, truncate the number of examples for each dataset."} + default=None, metadata={"help": "For debugging purposes, truncate the number of examples for each dataset."} ) eval_num_beams: Optional[int] = field( default=None, - metadata={"help": "Number of beams to use for evaluation. This argument will be passed to `model.generate`"} + metadata={"help": "Number of beams to use for evaluation. This argument will be passed to `model.generate`"}, ) ignore_pad_token_for_loss: Optional[bool] = field( default=True, - metadata={"help": "Whether to ignore the tokens corresponding to padded labels in the loss computation or not."} + metadata={ + "help": "Whether to ignore the tokens corresponding to padded labels in the loss computation or not." + }, ) val_size: Optional[float] = field( - default=0, - metadata={"help": "Size of the development set, should be an integer or a float in range `[0,1)`."} + default=0, metadata={"help": "Size of the development set, should be an integer or a float in range `[0,1)`."} ) sft_packing: Optional[bool] = field( - default=False, - metadata={"help": "Packing the questions and answers in the supervised fine-tuning stage."} + default=False, metadata={"help": "Packing the questions and answers in the supervised fine-tuning stage."} ) cache_path: Optional[str] = field( - default=None, - metadata={"help": "Path to save or load the preprocessed datasets."} + default=None, metadata={"help": "Path to save or load the preprocessed datasets."} ) def __post_init__(self): diff --git a/src/llmtuner/hparams/evaluation_args.py b/src/llmtuner/hparams/evaluation_args.py index c70103ed..ac1ceb68 100644 --- a/src/llmtuner/hparams/evaluation_args.py +++ b/src/llmtuner/hparams/evaluation_args.py @@ -1,6 +1,6 @@ import os -from typing import Literal, Optional from dataclasses import dataclass, field +from typing import Literal, Optional from datasets import DownloadMode @@ -10,36 +10,18 @@ class EvaluationArguments: r""" Arguments pertaining to specify the evaluation parameters. """ - task: str = field( - metadata={"help": "Name of the evaluation task."} - ) + task: str = field(metadata={"help": "Name of the evaluation task."}) task_dir: Optional[str] = field( - default="evaluation", - metadata={"help": "Path to the folder containing the evaluation datasets."} - ) - batch_size: Optional[int] = field( - default=4, - metadata={"help": "The batch size per GPU for evaluation."} - ) - seed: Optional[int] = field( - default=42, - metadata={"help": "Random seed to be used with data loaders."} - ) - lang: Optional[Literal["en", "zh"]] = field( - default="en", - metadata={"help": "Language used at evaluation."} - ) - n_shot: Optional[int] = field( - default=5, - metadata={"help": "Number of examplars for few-shot learning."} - ) - save_dir: Optional[str] = field( - default=None, - metadata={"help": "Path to save the evaluation results."} + default="evaluation", metadata={"help": "Path to the folder containing the evaluation datasets."} ) + batch_size: Optional[int] = field(default=4, metadata={"help": "The batch size per GPU for evaluation."}) + seed: Optional[int] = field(default=42, metadata={"help": "Random seed to be used with data loaders."}) + lang: Optional[Literal["en", "zh"]] = field(default="en", metadata={"help": "Language used at evaluation."}) + n_shot: Optional[int] = field(default=5, metadata={"help": "Number of examplars for few-shot learning."}) + save_dir: Optional[str] = field(default=None, metadata={"help": "Path to save the evaluation results."}) download_mode: Optional[DownloadMode] = field( default=DownloadMode.REUSE_DATASET_IF_EXISTS, - metadata={"help": "Download mode used for the evaluation datasets."} + metadata={"help": "Download mode used for the evaluation datasets."}, ) def __post_init__(self): diff --git a/src/llmtuner/hparams/finetuning_args.py b/src/llmtuner/hparams/finetuning_args.py index 35599f81..43e5e9e0 100644 --- a/src/llmtuner/hparams/finetuning_args.py +++ b/src/llmtuner/hparams/finetuning_args.py @@ -1,6 +1,6 @@ import json -from typing import Literal, Optional from dataclasses import asdict, dataclass, field +from typing import Literal, Optional @dataclass @@ -10,17 +10,18 @@ class FreezeArguments: """ name_module_trainable: Optional[str] = field( default="mlp", - metadata={"help": "Name of trainable modules for partial-parameter (freeze) fine-tuning. \ + metadata={ + "help": 'Name of trainable modules for partial-parameter (freeze) fine-tuning. \ Use commas to separate multiple modules. \ - LLaMA choices: [\"mlp\", \"self_attn\"], \ - BLOOM & Falcon & ChatGLM choices: [\"mlp\", \"self_attention\"], \ - Qwen choices: [\"mlp\", \"attn\"], \ - Phi choices: [\"mlp\", \"mixer\"], \ - Others choices: the same as LLaMA."} + LLaMA choices: ["mlp", "self_attn"], \ + BLOOM & Falcon & ChatGLM choices: ["mlp", "self_attention"], \ + Qwen choices: ["mlp", "attn"], \ + Phi choices: ["mlp", "mixer"], \ + Others choices: the same as LLaMA.' + }, ) num_layer_trainable: Optional[int] = field( - default=3, - metadata={"help": "The number of trainable layers for partial-parameter (freeze) fine-tuning."} + default=3, metadata={"help": "The number of trainable layers for partial-parameter (freeze) fine-tuning."} ) @@ -31,37 +32,32 @@ class LoraArguments: """ additional_target: Optional[str] = field( default=None, - metadata={"help": "Name(s) of modules apart from LoRA layers to be set as trainable and saved in the final checkpoint."} + metadata={ + "help": "Name(s) of modules apart from LoRA layers to be set as trainable and saved in the final checkpoint." + }, ) lora_alpha: Optional[int] = field( - default=None, - metadata={"help": "The scale factor for LoRA fine-tuning (default: lora_rank * 2)."} - ) - lora_dropout: Optional[float] = field( - default=0.0, - metadata={"help": "Dropout rate for the LoRA fine-tuning."} - ) - lora_rank: Optional[int] = field( - default=8, - metadata={"help": "The intrinsic dimension for LoRA fine-tuning."} + default=None, metadata={"help": "The scale factor for LoRA fine-tuning (default: lora_rank * 2)."} ) + lora_dropout: Optional[float] = field(default=0.0, metadata={"help": "Dropout rate for the LoRA fine-tuning."}) + lora_rank: Optional[int] = field(default=8, metadata={"help": "The intrinsic dimension for LoRA fine-tuning."}) lora_target: Optional[str] = field( default=None, - metadata={"help": "Name(s) of target modules to apply LoRA. Use commas to separate multiple modules. \ - LLaMA choices: [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\", \"gate_proj\", \"up_proj\", \"down_proj\"], \ - BLOOM & Falcon & ChatGLM choices: [\"query_key_value\", \"dense\", \"dense_h_to_4h\", \"dense_4h_to_h\"], \ - Baichuan choices: [\"W_pack\", \"o_proj\", \"gate_proj\", \"up_proj\", \"down_proj\"], \ - Qwen choices: [\"c_attn\", \"attn.c_proj\", \"w1\", \"w2\", \"mlp.c_proj\"], \ - Phi choices: [\"Wqkv\", \"out_proj\", \"fc1\", \"fc2\"], \ - Others choices: the same as LLaMA."} + metadata={ + "help": 'Name(s) of target modules to apply LoRA. Use commas to separate multiple modules. \ + LLaMA choices: ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], \ + BLOOM & Falcon & ChatGLM choices: ["query_key_value", "dense", "dense_h_to_4h", "dense_4h_to_h"], \ + Baichuan choices: ["W_pack", "o_proj", "gate_proj", "up_proj", "down_proj"], \ + Qwen choices: ["c_attn", "attn.c_proj", "w1", "w2", "mlp.c_proj"], \ + Phi choices: ["Wqkv", "out_proj", "fc1", "fc2"], \ + Others choices: the same as LLaMA.' + }, ) lora_bf16_mode: Optional[bool] = field( - default=False, - metadata={"help": "Whether or not to train lora adapters in bf16 precision."} + default=False, metadata={"help": "Whether or not to train lora adapters in bf16 precision."} ) create_new_adapter: Optional[bool] = field( - default=False, - metadata={"help": "Whether or not to create a new adapter with randomly initialized weight."} + default=False, metadata={"help": "Whether or not to create a new adapter with randomly initialized weight."} ) @@ -70,69 +66,53 @@ class RLHFArguments: r""" Arguments pertaining to the PPO and DPO training. """ - dpo_beta: Optional[float] = field( - default=0.1, - metadata={"help": "The beta parameter for the DPO loss."} - ) + dpo_beta: Optional[float] = field(default=0.1, metadata={"help": "The beta parameter for the DPO loss."}) dpo_loss: Optional[Literal["sigmoid", "hinge", "ipo", "kto"]] = field( - default="sigmoid", - metadata={"help": "The type of DPO loss to use."} + default="sigmoid", metadata={"help": "The type of DPO loss to use."} ) dpo_ftx: Optional[float] = field( - default=0, - metadata={"help": "The supervised fine-tuning loss coefficient in DPO training."} + default=0, metadata={"help": "The supervised fine-tuning loss coefficient in DPO training."} ) ppo_buffer_size: Optional[int] = field( default=1, - metadata={"help": "The number of mini-batches to make experience buffer in a PPO optimization step."} + metadata={"help": "The number of mini-batches to make experience buffer in a PPO optimization step."}, ) ppo_epochs: Optional[int] = field( - default=4, - metadata={"help": "The number of epochs to perform in a PPO optimization step."} + default=4, metadata={"help": "The number of epochs to perform in a PPO optimization step."} ) ppo_logger: Optional[str] = field( - default=None, - metadata={"help": "Log with either \"wandb\" or \"tensorboard\" in PPO training."} + default=None, metadata={"help": 'Log with either "wandb" or "tensorboard" in PPO training.'} ) ppo_score_norm: Optional[bool] = field( - default=False, - metadata={"help": "Use score normalization in PPO training."} + default=False, metadata={"help": "Use score normalization in PPO training."} ) ppo_target: Optional[float] = field( - default=6.0, - metadata={"help": "Target KL value for adaptive KL control in PPO training."} + default=6.0, metadata={"help": "Target KL value for adaptive KL control in PPO training."} ) ppo_whiten_rewards: Optional[bool] = field( - default=False, - metadata={"help": "Whiten the rewards before compute advantages in PPO training."} + default=False, metadata={"help": "Whiten the rewards before compute advantages in PPO training."} ) ref_model: Optional[str] = field( - default=None, - metadata={"help": "Path to the reference model used for the PPO or DPO training."} + default=None, metadata={"help": "Path to the reference model used for the PPO or DPO training."} ) ref_model_adapters: Optional[str] = field( - default=None, - metadata={"help": "Path to the adapters of the reference model."} + default=None, metadata={"help": "Path to the adapters of the reference model."} ) ref_model_quantization_bit: Optional[int] = field( - default=None, - metadata={"help": "The number of bits to quantize the reference model."} + default=None, metadata={"help": "The number of bits to quantize the reference model."} ) reward_model: Optional[str] = field( - default=None, - metadata={"help": "Path to the reward model used for the PPO training."} + default=None, metadata={"help": "Path to the reward model used for the PPO training."} ) reward_model_adapters: Optional[str] = field( - default=None, - metadata={"help": "Path to the adapters of the reward model."} + default=None, metadata={"help": "Path to the adapters of the reward model."} ) reward_model_quantization_bit: Optional[int] = field( - default=None, - metadata={"help": "The number of bits to quantize the reward model."} + default=None, metadata={"help": "The number of bits to quantize the reward model."} ) reward_model_type: Optional[Literal["lora", "full", "api"]] = field( default="lora", - metadata={"help": "The type of the reward model in PPO training. Lora model only supports lora training."} + metadata={"help": "The type of the reward model in PPO training. Lora model only supports lora training."}, ) @@ -142,16 +122,13 @@ class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments): Arguments pertaining to which techniques we are going to fine-tuning with. """ stage: Optional[Literal["pt", "sft", "rm", "ppo", "dpo"]] = field( - default="sft", - metadata={"help": "Which stage will be performed in training."} + default="sft", metadata={"help": "Which stage will be performed in training."} ) finetuning_type: Optional[Literal["lora", "freeze", "full"]] = field( - default="lora", - metadata={"help": "Which fine-tuning method to use."} + default="lora", metadata={"help": "Which fine-tuning method to use."} ) plot_loss: Optional[bool] = field( - default=False, - metadata={"help": "Whether or not to save the training loss curves."} + default=False, metadata={"help": "Whether or not to save the training loss curves."} ) def __post_init__(self): diff --git a/src/llmtuner/hparams/generating_args.py b/src/llmtuner/hparams/generating_args.py index c04a5c36..3df2749a 100644 --- a/src/llmtuner/hparams/generating_args.py +++ b/src/llmtuner/hparams/generating_args.py @@ -1,5 +1,5 @@ -from typing import Any, Dict, Optional from dataclasses import asdict, dataclass, field +from typing import Any, Dict, Optional @dataclass @@ -8,40 +8,37 @@ class GeneratingArguments: Arguments pertaining to specify the decoding parameters. """ do_sample: Optional[bool] = field( - default=True, - metadata={"help": "Whether or not to use sampling, use greedy decoding otherwise."} + default=True, metadata={"help": "Whether or not to use sampling, use greedy decoding otherwise."} ) temperature: Optional[float] = field( - default=0.95, - metadata={"help": "The value used to modulate the next token probabilities."} + default=0.95, metadata={"help": "The value used to modulate the next token probabilities."} ) top_p: Optional[float] = field( default=0.7, - metadata={"help": "The smallest set of most probable tokens with probabilities that add up to top_p or higher are kept."} + metadata={ + "help": "The smallest set of most probable tokens with probabilities that add up to top_p or higher are kept." + }, ) top_k: Optional[int] = field( default=50, - metadata={"help": "The number of highest probability vocabulary tokens to keep for top-k filtering."} + metadata={"help": "The number of highest probability vocabulary tokens to keep for top-k filtering."}, ) num_beams: Optional[int] = field( - default=1, - metadata={"help": "Number of beams for beam search. 1 means no beam search."} + default=1, metadata={"help": "Number of beams for beam search. 1 means no beam search."} ) max_length: Optional[int] = field( default=512, - metadata={"help": "The maximum length the generated tokens can have. It can be overridden by max_new_tokens."} + metadata={"help": "The maximum length the generated tokens can have. It can be overridden by max_new_tokens."}, ) max_new_tokens: Optional[int] = field( default=512, - metadata={"help": "The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt."} + metadata={"help": "The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt."}, ) repetition_penalty: Optional[float] = field( - default=1.0, - metadata={"help": "The parameter for repetition penalty. 1.0 means no penalty."} + default=1.0, metadata={"help": "The parameter for repetition penalty. 1.0 means no penalty."} ) length_penalty: Optional[float] = field( - default=1.0, - metadata={"help": "Exponential penalty to the length that is used with beam-based generation."} + default=1.0, metadata={"help": "Exponential penalty to the length that is used with beam-based generation."} ) def to_dict(self) -> Dict[str, Any]: diff --git a/src/llmtuner/hparams/model_args.py b/src/llmtuner/hparams/model_args.py index 15703e3b..9737f707 100644 --- a/src/llmtuner/hparams/model_args.py +++ b/src/llmtuner/hparams/model_args.py @@ -1,5 +1,5 @@ -from typing import Any, Dict, Literal, Optional from dataclasses import asdict, dataclass, field +from typing import Any, Dict, Literal, Optional @dataclass @@ -11,108 +11,82 @@ class ModelArguments: metadata={"help": "Path to the model weight or identifier from huggingface.co/models or modelscope.cn/models."} ) adapter_name_or_path: Optional[str] = field( - default=None, - metadata={"help": "Path to the adapter weight or identifier from huggingface.co/models."} + default=None, metadata={"help": "Path to the adapter weight or identifier from huggingface.co/models."} ) cache_dir: Optional[str] = field( default=None, - metadata={"help": "Where to store the pre-trained models downloaded from huggingface.co or modelscope.cn."} + metadata={"help": "Where to store the pre-trained models downloaded from huggingface.co or modelscope.cn."}, ) use_fast_tokenizer: Optional[bool] = field( default=False, - metadata={"help": "Whether or not to use one of the fast tokenizer (backed by the tokenizers library)."} + metadata={"help": "Whether or not to use one of the fast tokenizer (backed by the tokenizers library)."}, ) resize_vocab: Optional[bool] = field( - default=False, - metadata={"help": "Whether or not to resize the tokenizer vocab and the embedding layers."} + default=False, metadata={"help": "Whether or not to resize the tokenizer vocab and the embedding layers."} ) split_special_tokens: Optional[bool] = field( default=False, - metadata={"help": "Whether or not the special tokens should be split during the tokenization process."} + metadata={"help": "Whether or not the special tokens should be split during the tokenization process."}, ) model_revision: Optional[str] = field( default="main", - metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."} + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) quantization_bit: Optional[int] = field( - default=None, - metadata={"help": "The number of bits to quantize the model."} + default=None, metadata={"help": "The number of bits to quantize the model."} ) quantization_type: Optional[Literal["fp4", "nf4"]] = field( - default="nf4", - metadata={"help": "Quantization data type to use in int4 training."} + default="nf4", metadata={"help": "Quantization data type to use in int4 training."} ) double_quantization: Optional[bool] = field( - default=True, - metadata={"help": "Whether or not to use double quantization in int4 training."} + default=True, metadata={"help": "Whether or not to use double quantization in int4 training."} ) rope_scaling: Optional[Literal["linear", "dynamic"]] = field( - default=None, - metadata={"help": "Which scaling strategy should be adopted for the RoPE embeddings."} + default=None, metadata={"help": "Which scaling strategy should be adopted for the RoPE embeddings."} ) flash_attn: Optional[bool] = field( - default=False, - metadata={"help": "Enable FlashAttention-2 for faster training."} + default=False, metadata={"help": "Enable FlashAttention-2 for faster training."} ) shift_attn: Optional[bool] = field( - default=False, - metadata={"help": "Enable shift short attention (S^2-Attn) proposed by LongLoRA."} + default=False, metadata={"help": "Enable shift short attention (S^2-Attn) proposed by LongLoRA."} ) use_unsloth: Optional[bool] = field( - default=False, - metadata={"help": "Whether or not to use unsloth's optimization for the LoRA training."} + default=False, metadata={"help": "Whether or not to use unsloth's optimization for the LoRA training."} ) disable_gradient_checkpointing: Optional[bool] = field( - default=False, - metadata={"help": "Whether or not to disable gradient checkpointing."} + default=False, metadata={"help": "Whether or not to disable gradient checkpointing."} ) upcast_layernorm: Optional[bool] = field( - default=False, - metadata={"help": "Whether or not to upcast the layernorm weights in fp32."} + default=False, metadata={"help": "Whether or not to upcast the layernorm weights in fp32."} ) upcast_lmhead_output: Optional[bool] = field( - default=False, - metadata={"help": "Whether or not to upcast the output of lm_head in fp32."} - ) - hf_hub_token: Optional[str] = field( - default=None, - metadata={"help": "Auth token to log in with Hugging Face Hub."} - ) - ms_hub_token: Optional[str] = field( - default=None, - metadata={"help": "Auth token to log in with ModelScope Hub."} + default=False, metadata={"help": "Whether or not to upcast the output of lm_head in fp32."} ) + hf_hub_token: Optional[str] = field(default=None, metadata={"help": "Auth token to log in with Hugging Face Hub."}) + ms_hub_token: Optional[str] = field(default=None, metadata={"help": "Auth token to log in with ModelScope Hub."}) export_dir: Optional[str] = field( - default=None, - metadata={"help": "Path to the directory to save the exported model."} + default=None, metadata={"help": "Path to the directory to save the exported model."} ) export_size: Optional[int] = field( - default=1, - metadata={"help": "The file shard size (in GB) of the exported model."} + default=1, metadata={"help": "The file shard size (in GB) of the exported model."} ) export_quantization_bit: Optional[int] = field( - default=None, - metadata={"help": "The number of bits to quantize the exported model."} + default=None, metadata={"help": "The number of bits to quantize the exported model."} ) export_quantization_dataset: Optional[str] = field( - default=None, - metadata={"help": "Path to the dataset or dataset name to use in quantizing the exported model."} + default=None, metadata={"help": "Path to the dataset or dataset name to use in quantizing the exported model."} ) export_quantization_nsamples: Optional[int] = field( - default=128, - metadata={"help": "The number of samples used for quantization."} + default=128, metadata={"help": "The number of samples used for quantization."} ) export_quantization_maxlen: Optional[int] = field( - default=1024, - metadata={"help": "The maximum length of the model inputs used for quantization."} + default=1024, metadata={"help": "The maximum length of the model inputs used for quantization."} ) export_legacy_format: Optional[bool] = field( - default=False, - metadata={"help": "Whether or not to save the `.bin` files instead of `.safetensors`."} + default=False, metadata={"help": "Whether or not to save the `.bin` files instead of `.safetensors`."} ) export_hub_model_id: Optional[str] = field( - default=None, - metadata={"help": "The name of the repository if push the model to the Hugging Face hub."} + default=None, metadata={"help": "The name of the repository if push the model to the Hugging Face hub."} ) def __post_init__(self): @@ -122,7 +96,7 @@ class ModelArguments: if self.split_special_tokens and self.use_fast_tokenizer: raise ValueError("`split_special_tokens` is only supported for slow tokenizers.") - if self.adapter_name_or_path is not None: # support merging multiple lora weights + if self.adapter_name_or_path is not None: # support merging multiple lora weights self.adapter_name_or_path = [path.strip() for path in self.adapter_name_or_path.split(",")] assert self.quantization_bit in [None, 8, 4], "We only accept 4-bit or 8-bit quantization." diff --git a/src/llmtuner/hparams/parser.py b/src/llmtuner/hparams/parser.py index a050b89a..0e434a1d 100644 --- a/src/llmtuner/hparams/parser.py +++ b/src/llmtuner/hparams/parser.py @@ -1,10 +1,11 @@ +import logging import os import sys -import torch -import logging -import datasets -import transformers from typing import Any, Dict, Optional, Tuple + +import datasets +import torch +import transformers from transformers import HfArgumentParser, Seq2SeqTrainingArguments from transformers.trainer_utils import get_last_checkpoint @@ -19,24 +20,12 @@ from .model_args import ModelArguments logger = get_logger(__name__) -_TRAIN_ARGS = [ - ModelArguments, DataArguments, Seq2SeqTrainingArguments, FinetuningArguments, GeneratingArguments -] -_TRAIN_CLS = Tuple[ - ModelArguments, DataArguments, Seq2SeqTrainingArguments, FinetuningArguments, GeneratingArguments -] -_INFER_ARGS = [ - ModelArguments, DataArguments, FinetuningArguments, GeneratingArguments -] -_INFER_CLS = Tuple[ - ModelArguments, DataArguments, FinetuningArguments, GeneratingArguments -] -_EVAL_ARGS = [ - ModelArguments, DataArguments, EvaluationArguments, FinetuningArguments -] -_EVAL_CLS = Tuple[ - ModelArguments, DataArguments, EvaluationArguments, FinetuningArguments -] +_TRAIN_ARGS = [ModelArguments, DataArguments, Seq2SeqTrainingArguments, FinetuningArguments, GeneratingArguments] +_TRAIN_CLS = Tuple[ModelArguments, DataArguments, Seq2SeqTrainingArguments, FinetuningArguments, GeneratingArguments] +_INFER_ARGS = [ModelArguments, DataArguments, FinetuningArguments, GeneratingArguments] +_INFER_CLS = Tuple[ModelArguments, DataArguments, FinetuningArguments, GeneratingArguments] +_EVAL_ARGS = [ModelArguments, DataArguments, EvaluationArguments, FinetuningArguments] +_EVAL_CLS = Tuple[ModelArguments, DataArguments, EvaluationArguments, FinetuningArguments] def _parse_args(parser: "HfArgumentParser", args: Optional[Dict[str, Any]] = None) -> Tuple[Any]: @@ -77,7 +66,7 @@ def _verify_model_args(model_args: "ModelArguments", finetuning_args: "Finetunin if model_args.adapter_name_or_path is not None and len(model_args.adapter_name_or_path) != 1: if finetuning_args.finetuning_type != "lora": raise ValueError("Multiple adapters are only available for LoRA tuning.") - + if model_args.quantization_bit is not None: raise ValueError("Quantized model only accepts a single adapter. Merge them first.") @@ -181,18 +170,22 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: training_args_dict = training_args.to_dict() training_args_dict.update(dict(resume_from_checkpoint=last_checkpoint)) training_args = Seq2SeqTrainingArguments(**training_args_dict) - logger.info("Resuming training from {}. Change `output_dir` or use `overwrite_output_dir` to avoid.".format( - training_args.resume_from_checkpoint - )) + logger.info( + "Resuming training from {}. Change `output_dir` or use `overwrite_output_dir` to avoid.".format( + training_args.resume_from_checkpoint + ) + ) if ( finetuning_args.stage in ["rm", "ppo"] and finetuning_args.finetuning_type == "lora" and training_args.resume_from_checkpoint is not None ): - logger.warning("Add {} to `adapter_name_or_path` to resume training from checkpoint.".format( - training_args.resume_from_checkpoint - )) + logger.warning( + "Add {} to `adapter_name_or_path` to resume training from checkpoint.".format( + training_args.resume_from_checkpoint + ) + ) # postprocess model_args model_args.compute_dtype = ( @@ -201,10 +194,15 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: model_args.model_max_length = data_args.cutoff_len # Log on each process the small summary: - logger.info("Process rank: {}, device: {}, n_gpu: {}\n distributed training: {}, compute dtype: {}".format( - training_args.local_rank, training_args.device, training_args.n_gpu, - bool(training_args.local_rank != -1), str(model_args.compute_dtype) - )) + logger.info( + "Process rank: {}, device: {}, n_gpu: {}\n distributed training: {}, compute dtype: {}".format( + training_args.local_rank, + training_args.device, + training_args.n_gpu, + bool(training_args.local_rank != -1), + str(model_args.compute_dtype), + ) + ) logger.info(f"Training/evaluation parameters {training_args}") # Set seed before initializing model. diff --git a/src/llmtuner/model/adapter.py b/src/llmtuner/model/adapter.py index a7e9b9fe..c4a1a3b9 100644 --- a/src/llmtuner/model/adapter.py +++ b/src/llmtuner/model/adapter.py @@ -1,25 +1,25 @@ -import torch import inspect from typing import TYPE_CHECKING + +import torch +from peft import LoraConfig, PeftModel, TaskType, get_peft_model from transformers.integrations import is_deepspeed_zero3_enabled -from peft import PeftModel, TaskType, LoraConfig, get_peft_model from ..extras.logging import get_logger from .utils import find_all_linear_modules + if TYPE_CHECKING: from transformers.modeling_utils import PreTrainedModel - from ..hparams import ModelArguments, FinetuningArguments + + from ..hparams import FinetuningArguments, ModelArguments logger = get_logger(__name__) def init_adapter( - model: "PreTrainedModel", - model_args: "ModelArguments", - finetuning_args: "FinetuningArguments", - is_trainable: bool + model: "PreTrainedModel", model_args: "ModelArguments", finetuning_args: "FinetuningArguments", is_trainable: bool ) -> "PreTrainedModel": r""" Initializes the adapters. @@ -47,10 +47,10 @@ def init_adapter( if not num_layers: raise ValueError("Current model does not support freeze tuning.") - if finetuning_args.num_layer_trainable > 0: # fine-tuning the last n layers if num_layer_trainable > 0 + if finetuning_args.num_layer_trainable > 0: # fine-tuning the last n layers if num_layer_trainable > 0 trainable_layer_ids = [num_layers - k - 1 for k in range(finetuning_args.num_layer_trainable)] - else: # fine-tuning the first n layers if num_layer_trainable < 0 - trainable_layer_ids = [k for k in range(-finetuning_args.num_layer_trainable)] + else: # fine-tuning the first n layers if num_layer_trainable < 0 + trainable_layer_ids = [k for k in range(-finetuning_args.num_layer_trainable)] # noqa: C416 trainable_layers = [] for module_name in finetuning_args.name_module_trainable: @@ -69,7 +69,7 @@ def init_adapter( if model_args.adapter_name_or_path is not None: is_mergeable = True - if getattr(model, "quantization_method", None): # merge lora in quantized model is unstable + if getattr(model, "quantization_method", None): # merge lora in quantized model is unstable assert len(model_args.adapter_name_or_path) == 1, "Quantized model only accepts a single adapter." is_mergeable = False @@ -90,10 +90,10 @@ def init_adapter( if len(adapter_to_merge) > 0: logger.info("Merged {} adapter(s).".format(len(adapter_to_merge))) - if adapter_to_resume is not None: # resume lora training + if adapter_to_resume is not None: # resume lora training model = PeftModel.from_pretrained(model, adapter_to_resume, is_trainable=is_trainable) - if is_trainable and adapter_to_resume is None: # create new lora weights while training + if is_trainable and adapter_to_resume is None: # create new lora weights while training if len(finetuning_args.lora_target) == 1 and finetuning_args.lora_target[0] == "all": target_modules = find_all_linear_modules(model) else: @@ -103,11 +103,12 @@ def init_adapter( "r": finetuning_args.lora_rank, "target_modules": target_modules, "lora_alpha": finetuning_args.lora_alpha, - "lora_dropout": finetuning_args.lora_dropout + "lora_dropout": finetuning_args.lora_dropout, } if model_args.use_unsloth: - from unsloth import FastLlamaModel, FastMistralModel # type: ignore + from unsloth import FastLlamaModel, FastMistralModel # type: ignore + unsloth_peft_kwargs = {"model": model, "max_seq_length": model_args.model_max_length} if "loftq_config" in inspect.signature(FastLlamaModel.get_peft_model).parameters: unsloth_peft_kwargs["loftq_config"] = {} @@ -124,7 +125,7 @@ def init_adapter( task_type=TaskType.CAUSAL_LM, inference_mode=False, modules_to_save=finetuning_args.additional_target, - **peft_kwargs + **peft_kwargs, ) model = get_peft_model(model, lora_config) diff --git a/src/llmtuner/model/loader.py b/src/llmtuner/model/loader.py index adc45ea8..b02a4560 100644 --- a/src/llmtuner/model/loader.py +++ b/src/llmtuner/model/loader.py @@ -1,4 +1,5 @@ from typing import TYPE_CHECKING, Optional, Tuple + from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer from transformers.integrations import is_deepspeed_zero3_enabled from transformers.utils.versions import require_version @@ -7,12 +8,14 @@ from trl import AutoModelForCausalLMWithValueHead from ..extras.logging import get_logger from ..extras.misc import count_parameters, get_current_device, try_download_model_from_ms from .adapter import init_adapter -from .patcher import patch_config, patch_tokenizer, patch_model, patch_valuehead_model +from .patcher import patch_config, patch_model, patch_tokenizer, patch_valuehead_model from .utils import load_valuehead_params, register_autoclass + if TYPE_CHECKING: from transformers import PreTrainedModel, PreTrainedTokenizer - from ..hparams import ModelArguments, FinetuningArguments + + from ..hparams import FinetuningArguments, ModelArguments logger = get_logger(__name__) @@ -29,7 +32,7 @@ def load_model_and_tokenizer( model_args: "ModelArguments", finetuning_args: "FinetuningArguments", is_trainable: Optional[bool] = False, - add_valuehead: Optional[bool] = False + add_valuehead: Optional[bool] = False, ) -> Tuple["PreTrainedModel", "PreTrainedTokenizer"]: r""" Loads pretrained model and tokenizer. @@ -43,7 +46,7 @@ def load_model_and_tokenizer( "trust_remote_code": True, "cache_dir": model_args.cache_dir, "revision": model_args.model_revision, - "token": model_args.hf_hub_token + "token": model_args.hf_hub_token, } tokenizer = AutoTokenizer.from_pretrained( @@ -51,7 +54,7 @@ def load_model_and_tokenizer( use_fast=model_args.use_fast_tokenizer, split_special_tokens=model_args.split_special_tokens, padding_side="right", - **config_kwargs + **config_kwargs, ) patch_tokenizer(tokenizer) @@ -61,7 +64,8 @@ def load_model_and_tokenizer( model = None if is_trainable and model_args.use_unsloth: require_version("unsloth", "Follow the instructions at: https://github.com/unslothai/unsloth") - from unsloth import FastLlamaModel, FastMistralModel # type: ignore + from unsloth import FastLlamaModel, FastMistralModel # type: ignore + unsloth_kwargs = { "model_name": model_args.model_name_or_path, "max_seq_length": model_args.model_max_length, @@ -69,7 +73,7 @@ def load_model_and_tokenizer( "load_in_4bit": model_args.quantization_bit == 4, "token": model_args.hf_hub_token, "device_map": get_current_device(), - "rope_scaling": getattr(config, "rope_scaling", None) + "rope_scaling": getattr(config, "rope_scaling", None), } if getattr(config, "model_type", None) == "llama": model, _ = FastLlamaModel.from_pretrained(**unsloth_kwargs) @@ -89,7 +93,7 @@ def load_model_and_tokenizer( config=config, torch_dtype=model_args.compute_dtype, low_cpu_mem_usage=(not is_deepspeed_zero3_enabled()), - **config_kwargs + **config_kwargs, ) patch_model(model, tokenizer, model_args, is_trainable) @@ -119,9 +123,11 @@ def load_model_and_tokenizer( model.train() trainable_params, all_param = count_parameters(model) - logger.info("trainable params: {:d} || all params: {:d} || trainable%: {:.4f}".format( - trainable_params, all_param, 100 * trainable_params / all_param - )) + logger.info( + "trainable params: {:d} || all params: {:d} || trainable%: {:.4f}".format( + trainable_params, all_param, 100 * trainable_params / all_param + ) + ) if not is_trainable: logger.info("This IS expected that the trainable params is 0 if you are using model for inference only.") diff --git a/src/llmtuner/model/patcher.py b/src/llmtuner/model/patcher.py index 5af9f5af..8a373760 100644 --- a/src/llmtuner/model/patcher.py +++ b/src/llmtuner/model/patcher.py @@ -1,12 +1,12 @@ -import os import math -import torch +import os import random +from contextlib import nullcontext from types import MethodType from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple -from datasets import load_dataset -from contextlib import nullcontext +import torch +from datasets import load_dataset from transformers import BitsAndBytesConfig, GPTQConfig, PreTrainedModel, PreTrainedTokenizerBase from transformers.integrations import is_deepspeed_zero3_enabled from transformers.utils.versions import require_version @@ -17,9 +17,11 @@ from ..extras.misc import get_current_device, infer_optim_dtype from ..extras.packages import is_flash_attn2_available from ..extras.patches.llama_patch import apply_llama_patch + if TYPE_CHECKING: from transformers import PretrainedConfig, PreTrainedTokenizer from trl import AutoModelForCausalLMWithValueHead + from ..hparams import ModelArguments @@ -40,7 +42,8 @@ def _resize_embedding_layer(model: "PreTrainedModel", tokenizer: "PreTrainedToke Resize token embeddings. """ if is_deepspeed_zero3_enabled(): - import deepspeed # type: ignore + import deepspeed # type: ignore + params = [model.get_input_embeddings().weight] if model.get_output_embeddings() is not None and not model.config.tie_word_embeddings: params.append(model.get_output_embeddings().weight) @@ -88,7 +91,7 @@ def _get_quantization_dataset(tokenizer: "PreTrainedTokenizer", model_args: "Mod sample_idx = random.randint(0, len(dataset) - 1) sample: Dict[str, torch.Tensor] = tokenizer(dataset[sample_idx]["text"], return_tensors="pt") if sample["input_ids"].size(1) >= maxlen: - break # TODO: fix large maxlen + break # TODO: fix large maxlen word_idx = random.randint(0, sample["input_ids"].size(1) - maxlen - 1) input_ids = sample["input_ids"][:, word_idx : word_idx + maxlen] @@ -119,9 +122,9 @@ def _configure_rope(config: "PretrainedConfig", model_args: "ModelArguments", is scaling_factor = 2.0 setattr(config, "rope_scaling", {"type": model_args.rope_scaling, "factor": scaling_factor}) - logger.info("Using {} scaling strategy and setting scaling factor to {}".format( - model_args.rope_scaling, scaling_factor - )) + logger.info( + "Using {} scaling strategy and setting scaling factor to {}".format(model_args.rope_scaling, scaling_factor) + ) def _configure_flashattn(config_kwargs: Dict[str, Any]) -> None: @@ -146,22 +149,22 @@ def _configure_quantization( config: "PretrainedConfig", tokenizer: "PreTrainedTokenizer", model_args: "ModelArguments", - config_kwargs: Dict[str, Any] + config_kwargs: Dict[str, Any], ) -> None: r""" Priority: GPTQ-quantized (training) > AutoGPTQ (export) > Bitsandbytes (training) """ - if getattr(config, "quantization_config", None): # gptq + if getattr(config, "quantization_config", None): # gptq if is_deepspeed_zero3_enabled(): raise ValueError("DeepSpeed ZeRO-3 is incompatible with quantization.") config_kwargs["device_map"] = {"": get_current_device()} quantization_config: Dict[str, Any] = getattr(config, "quantization_config", None) if quantization_config.get("quant_method", None) == "gptq" and quantization_config.get("bits", -1) == 4: - quantization_config["use_exllama"] = False # disable exllama + quantization_config["use_exllama"] = False # disable exllama logger.info("Loading {}-bit GPTQ-quantized model.".format(quantization_config.get("bits", -1))) - elif model_args.export_quantization_bit is not None: # auto-gptq + elif model_args.export_quantization_bit is not None: # auto-gptq require_version("optimum>=1.16.0", "To fix: pip install optimum>=1.16.0") require_version("auto_gptq>=0.5.0", "To fix: pip install auto_gptq>=0.5.0") from accelerate.utils import get_max_memory @@ -172,13 +175,13 @@ def _configure_quantization( config_kwargs["quantization_config"] = GPTQConfig( bits=model_args.export_quantization_bit, tokenizer=tokenizer, - dataset=_get_quantization_dataset(tokenizer, model_args) + dataset=_get_quantization_dataset(tokenizer, model_args), ) config_kwargs["device_map"] = "auto" config_kwargs["max_memory"] = get_max_memory() logger.info("Quantizing model to {} bit.".format(model_args.export_quantization_bit)) - elif model_args.quantization_bit is not None: # bnb + elif model_args.quantization_bit is not None: # bnb if is_deepspeed_zero3_enabled(): raise ValueError("DeepSpeed ZeRO-3 is incompatible with quantization.") @@ -192,7 +195,7 @@ def _configure_quantization( load_in_4bit=True, bnb_4bit_compute_dtype=model_args.compute_dtype, bnb_4bit_use_double_quant=model_args.double_quantization, - bnb_4bit_quant_type=model_args.quantization_type + bnb_4bit_quant_type=model_args.quantization_type, ) config_kwargs["device_map"] = {"": get_current_device()} @@ -200,9 +203,7 @@ def _configure_quantization( def _prepare_model_for_training( - model: "PreTrainedModel", - model_args: "ModelArguments", - output_layer_name: Optional[str] = "lm_head" + model: "PreTrainedModel", model_args: "ModelArguments", output_layer_name: Optional[str] = "lm_head" ) -> None: r""" Includes: @@ -222,10 +223,11 @@ def _prepare_model_for_training( logger.warning("Current model does not support gradient checkpointing.") else: model.gradient_checkpointing_enable(gradient_checkpointing_kwargs={"use_reentrant": False}) - model.config.use_cache = False # turn off when gradient checkpointing is enabled + model.config.use_cache = False # turn off when gradient checkpointing is enabled logger.info("Gradient checkpointing enabled.") if hasattr(model, output_layer_name) and model_args.upcast_lmhead_output: + def fp32_forward_post_hook(module: torch.nn.Module, args: Tuple[torch.Tensor], output: torch.Tensor): return output.to(torch.float32) @@ -244,9 +246,9 @@ def patch_config( tokenizer: "PreTrainedTokenizer", model_args: "ModelArguments", config_kwargs: Dict[str, Any], - is_trainable: bool + is_trainable: bool, ) -> None: - if model_args.compute_dtype is None: # priority: bf16 > fp16 > fp32 + if model_args.compute_dtype is None: # priority: bf16 > fp16 > fp32 model_args.compute_dtype = infer_optim_dtype(model_dtype=getattr(config, "torch_dtype", None)) if getattr(config, "model_type", None) == "qwen": @@ -266,10 +268,7 @@ def patch_config( def patch_model( - model: "PreTrainedModel", - tokenizer: "PreTrainedTokenizer", - model_args: "ModelArguments", - is_trainable: bool + model: "PreTrainedModel", tokenizer: "PreTrainedTokenizer", model_args: "ModelArguments", is_trainable: bool ) -> None: if "GenerationMixin" not in str(model.generate.__func__): model.generate = MethodType(PreTrainedModel.generate, model) diff --git a/src/llmtuner/model/utils.py b/src/llmtuner/model/utils.py index ba4478fb..7ff7aa34 100644 --- a/src/llmtuner/model/utils.py +++ b/src/llmtuner/model/utils.py @@ -1,16 +1,19 @@ -import torch import inspect from typing import TYPE_CHECKING, Any, Dict, List + +import torch from transformers import PreTrainedModel from transformers.utils import cached_file -from ..extras.constants import V_HEAD_WEIGHTS_NAME, V_HEAD_SAFE_WEIGHTS_NAME +from ..extras.constants import V_HEAD_SAFE_WEIGHTS_NAME, V_HEAD_WEIGHTS_NAME from ..extras.logging import get_logger from ..extras.misc import get_current_device + if TYPE_CHECKING: from transformers import PretrainedConfig, PreTrainedTokenizer - from ..hparams import ModelArguments, DataArguments, FinetuningArguments + + from ..hparams import DataArguments, FinetuningArguments, ModelArguments logger = get_logger(__name__) @@ -21,7 +24,7 @@ def dispatch_model(model: "PreTrainedModel") -> "PreTrainedModel": Dispatches a pre-trained model to GPUs with balanced memory when the GPU is available. Borrowed from: https://github.com/huggingface/transformers/blob/v4.36.2/src/transformers/modeling_utils.py#L3570 """ - if getattr(model, "quantization_method", None): # already set on current device + if getattr(model, "quantization_method", None): # already set on current device return model if ( @@ -31,7 +34,7 @@ def dispatch_model(model: "PreTrainedModel") -> "PreTrainedModel": and model.config.model_type != "chatglm" ): from accelerate import dispatch_model - from accelerate.utils import infer_auto_device_map, get_balanced_memory + from accelerate.utils import get_balanced_memory, infer_auto_device_map kwargs = {"dtype": model.dtype, "no_split_module_classes": model._get_no_split_modules("auto")} max_memory = get_balanced_memory(model, **kwargs) @@ -55,6 +58,7 @@ def find_all_linear_modules(model: "PreTrainedModel") -> List[str]: linear_cls = torch.nn.Linear elif quantization_method == "bitsandbytes": import bitsandbytes as bnb + linear_cls = bnb.nn.Linear4bit if getattr(model, "is_loaded_in_4bit", False) else bnb.nn.Linear8bitLt else: raise ValueError("Finding linear modules for {} models is not supported.".format(quantization_method)) @@ -65,10 +69,7 @@ def find_all_linear_modules(model: "PreTrainedModel") -> List[str]: module_names = set() for name, module in model.named_modules(): - if ( - isinstance(module, linear_cls) - and not any([output_layer in name for output_layer in output_layer_names]) - ): + if isinstance(module, linear_cls) and not any(output_layer in name for output_layer in output_layer_names): module_names.add(name.split(".")[-1]) logger.info("Found linear modules: {}".format(",".join(module_names))) @@ -76,16 +77,14 @@ def find_all_linear_modules(model: "PreTrainedModel") -> List[str]: def get_modelcard_args( - model_args: "ModelArguments", - data_args: "DataArguments", - finetuning_args: "FinetuningArguments" + model_args: "ModelArguments", data_args: "DataArguments", finetuning_args: "FinetuningArguments" ) -> Dict[str, Any]: return { "tasks": "text-generation", "license": "other", "finetuned_from": model_args.model_name_or_path, "dataset": [dataset.strip() for dataset in data_args.dataset.split(",")], - "tags": ["llama-factory"] + (["lora"] if finetuning_args.finetuning_type == "lora" else []) + "tags": ["llama-factory"] + (["lora"] if finetuning_args.finetuning_type == "lora" else []), } @@ -95,14 +94,11 @@ def load_valuehead_params(path_or_repo_id: str, model_args: "ModelArguments") -> Returns: dict with keys `v_head.summary.weight` and `v_head.summary.bias`. """ - kwargs = { - "path_or_repo_id": path_or_repo_id, - "cache_dir": model_args.cache_dir, - "token": model_args.hf_hub_token - } + kwargs = {"path_or_repo_id": path_or_repo_id, "cache_dir": model_args.cache_dir, "token": model_args.hf_hub_token} try: from safetensors import safe_open + vhead_file = cached_file(filename=V_HEAD_SAFE_WEIGHTS_NAME, **kwargs) with safe_open(vhead_file, framework="pt", device="cpu") as f: return {key: f.get_tensor(key) for key in f.keys()} diff --git a/src/llmtuner/train/dpo/collator.py b/src/llmtuner/train/dpo/collator.py index 5c862b4f..7e8ba1c5 100644 --- a/src/llmtuner/train/dpo/collator.py +++ b/src/llmtuner/train/dpo/collator.py @@ -1,6 +1,7 @@ -import torch from dataclasses import dataclass from typing import Any, Dict, List, Sequence, Tuple + +import torch from transformers import DataCollatorForSeq2Seq @@ -20,7 +21,7 @@ class DPODataCollatorWithPadding(DataCollatorForSeq2Seq): padded_tensor = self.label_pad_token_id * torch.ones_like(feature) padded_tensor[start:end] = feature[start:end] padded_labels.append(padded_tensor) - return torch.stack(padded_labels, dim=0).contiguous() # in contiguous memory + return torch.stack(padded_labels, dim=0).contiguous() # in contiguous memory def __call__(self, features: Sequence[Dict[str, Any]]) -> Dict[str, torch.Tensor]: r""" @@ -34,10 +35,12 @@ class DPODataCollatorWithPadding(DataCollatorForSeq2Seq): for key in ("chosen_ids", "rejected_ids"): for feature in features: prompt_len, answer_len = len(feature["prompt_ids"]), len(feature[key]) - concatenated_features.append({ - "input_ids": feature["prompt_ids"] + feature[key], - "attention_mask": [1] * (prompt_len + answer_len) - }) + concatenated_features.append( + { + "input_ids": feature["prompt_ids"] + feature[key], + "attention_mask": [1] * (prompt_len + answer_len), + } + ) label_positions.append((prompt_len, answer_len)) batch = self.tokenizer.pad( diff --git a/src/llmtuner/train/dpo/trainer.py b/src/llmtuner/train/dpo/trainer.py index b8d59c8e..2ea4707c 100644 --- a/src/llmtuner/train/dpo/trainer.py +++ b/src/llmtuner/train/dpo/trainer.py @@ -1,19 +1,20 @@ -import torch -from contextlib import nullcontext from collections import defaultdict +from contextlib import nullcontext from typing import TYPE_CHECKING, Dict, Literal, Optional, Tuple, Union + +import torch from transformers import BatchEncoding, Trainer from trl import DPOTrainer from trl.trainer.utils import disable_dropout_in_model from ...extras.constants import IGNORE_INDEX + if TYPE_CHECKING: from transformers import PreTrainedModel class CustomDPOTrainer(DPOTrainer): - def __init__( self, beta: float, @@ -22,15 +23,15 @@ class CustomDPOTrainer(DPOTrainer): model: Union["PreTrainedModel", torch.nn.Module], ref_model: Optional[Union["PreTrainedModel", torch.nn.Module]] = None, disable_dropout: Optional[bool] = True, - **kwargs + **kwargs, ): if disable_dropout: disable_dropout_in_model(model) if ref_model is not None: disable_dropout_in_model(ref_model) - self.use_dpo_data_collator = True # hack to avoid warning - self.generate_during_eval = False # disable at evaluation + self.use_dpo_data_collator = True # hack to avoid warning + self.generate_during_eval = False # disable at evaluation self.label_pad_token_id = IGNORE_INDEX self.padding_value = 0 self.is_encoder_decoder = model.config.is_encoder_decoder @@ -53,42 +54,29 @@ class CustomDPOTrainer(DPOTrainer): if ref_model is not None: if self.is_deepspeed_enabled: if not ( - getattr(ref_model, "is_loaded_in_8bit", False) - or getattr(ref_model, "is_loaded_in_4bit", False) - ): # quantized models are already set on the correct device + getattr(ref_model, "is_loaded_in_8bit", False) or getattr(ref_model, "is_loaded_in_4bit", False) + ): # quantized models are already set on the correct device self.ref_model = self._prepare_deepspeed(self.ref_model) else: self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True) - def sft_loss( - self, - chosen_logits: torch.FloatTensor, - chosen_labels: torch.LongTensor - ) -> torch.Tensor: + def sft_loss(self, chosen_logits: torch.FloatTensor, chosen_labels: torch.LongTensor) -> torch.Tensor: r""" Computes supervised cross-entropy loss of given labels under the given logits. Returns: A tensor of shape (batch_size,) containing the cross-entropy loss of each samples. """ - all_logps = self.get_batch_logps( - chosen_logits, - chosen_labels, - average_log_prob=True - ) + all_logps = self.get_batch_logps(chosen_logits, chosen_labels, average_log_prob=True) return -all_logps def concatenated_forward( - self, - model: "PreTrainedModel", - batch: Dict[str, torch.Tensor] + self, model: "PreTrainedModel", batch: Dict[str, torch.Tensor] ) -> Tuple[torch.FloatTensor, torch.FloatTensor, torch.FloatTensor, torch.FloatTensor]: - batch_copied = BatchEncoding({k: v.detach().clone() for k, v in batch.items()}) # avoid error + batch_copied = BatchEncoding({k: v.detach().clone() for k, v in batch.items()}) # avoid error all_logits = model( - input_ids=batch_copied["input_ids"], - attention_mask=batch_copied["attention_mask"], - return_dict=True + input_ids=batch_copied["input_ids"], attention_mask=batch_copied["attention_mask"], return_dict=True ).logits.to(torch.float32) all_logps = self.get_batch_logps( @@ -106,7 +94,7 @@ class CustomDPOTrainer(DPOTrainer): self, model: "PreTrainedModel", batch: Dict[str, torch.Tensor], - train_eval: Optional[Literal["train", "eval"]] = "train" + train_eval: Optional[Literal["train", "eval"]] = "train", ) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]: r""" Computes the DPO loss and other metrics for the given batch of inputs for train or test. diff --git a/src/llmtuner/train/dpo/workflow.py b/src/llmtuner/train/dpo/workflow.py index 3868d1d9..9ea3b617 100644 --- a/src/llmtuner/train/dpo/workflow.py +++ b/src/llmtuner/train/dpo/workflow.py @@ -1,6 +1,7 @@ # Inspired by: https://github.com/huggingface/trl/blob/main/examples/research_projects/stack_llama_2/scripts/dpo_llama2.py -from typing import TYPE_CHECKING, Optional, List +from typing import TYPE_CHECKING, List, Optional + from transformers import Seq2SeqTrainingArguments from ...data import get_dataset, split_dataset @@ -12,8 +13,10 @@ from ...train.dpo.collator import DPODataCollatorWithPadding from ...train.dpo.trainer import CustomDPOTrainer from ...train.utils import create_modelcard_and_push, create_ref_model + if TYPE_CHECKING: from transformers import TrainerCallback + from ...hparams import DataArguments, FinetuningArguments @@ -22,25 +25,25 @@ def run_dpo( data_args: "DataArguments", training_args: "Seq2SeqTrainingArguments", finetuning_args: "FinetuningArguments", - callbacks: Optional[List["TrainerCallback"]] = None + callbacks: Optional[List["TrainerCallback"]] = None, ): model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train) dataset = get_dataset(tokenizer, model_args, data_args, training_args, stage="rm") data_collator = DPODataCollatorWithPadding( tokenizer=tokenizer, pad_to_multiple_of=8, - label_pad_token_id=IGNORE_INDEX if data_args.ignore_pad_token_for_loss else tokenizer.pad_token_id + label_pad_token_id=IGNORE_INDEX if data_args.ignore_pad_token_for_loss else tokenizer.pad_token_id, ) # Create reference model - if finetuning_args.ref_model is None and (not training_args.do_train): # use the model itself + if finetuning_args.ref_model is None and (not training_args.do_train): # use the model itself ref_model = model else: ref_model = create_ref_model(model_args, finetuning_args) # Update arguments training_args_dict = training_args.to_dict() - training_args_dict.update(dict(remove_unused_columns=False)) # important for pairwise dataset + training_args_dict.update(dict(remove_unused_columns=False)) # important for pairwise dataset training_args = Seq2SeqTrainingArguments(**training_args_dict) # Initialize our Trainer @@ -54,7 +57,7 @@ def run_dpo( tokenizer=tokenizer, data_collator=data_collator, callbacks=callbacks, - **split_dataset(dataset, data_args, training_args) + **split_dataset(dataset, data_args, training_args), ) # Training @@ -70,7 +73,7 @@ def run_dpo( # Evaluation if training_args.do_eval: metrics = trainer.evaluate(metric_key_prefix="eval") - if id(model) == id(ref_model): # unable to compute rewards without a reference model + if id(model) == id(ref_model): # unable to compute rewards without a reference model remove_keys = [key for key in metrics.keys() if "rewards" in key] for key in remove_keys: metrics.pop(key) diff --git a/src/llmtuner/train/ppo/trainer.py b/src/llmtuner/train/ppo/trainer.py index 8b2116ea..9e88550b 100644 --- a/src/llmtuner/train/ppo/trainer.py +++ b/src/llmtuner/train/ppo/trainer.py @@ -1,27 +1,28 @@ +import math import os import sys -import math -import torch -from tqdm import tqdm from typing import TYPE_CHECKING, Dict, List, Optional, Tuple -from transformers import GenerationConfig, Trainer, TrainerState, TrainerControl -from transformers.utils import WEIGHTS_NAME, SAFE_WEIGHTS_NAME -from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR +import torch +from tqdm import tqdm +from transformers import GenerationConfig, Trainer, TrainerControl, TrainerState from transformers.trainer_pt_utils import remove_dummy_checkpoint - +from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR +from transformers.utils import SAFE_WEIGHTS_NAME, WEIGHTS_NAME from trl import PPOTrainer from trl.core import PPODecorators, logprobs_from_logits -from ...extras.callbacks import LogCallback, FixValueHeadModelCallback +from ...extras.callbacks import FixValueHeadModelCallback, LogCallback from ...extras.logging import get_logger from ...extras.misc import AverageMeter, count_parameters, get_logits_processor -from .utils import dump_layernorm, get_rewards_from_server, restore_layernorm, replace_model +from .utils import dump_layernorm, get_rewards_from_server, replace_model, restore_layernorm + if TYPE_CHECKING: from transformers import Seq2SeqTrainingArguments, TrainerCallback from trl import AutoModelForCausalLMWithValueHead - from ...hparams import ModelArguments, FinetuningArguments, GeneratingArguments + + from ...hparams import FinetuningArguments, GeneratingArguments, ModelArguments logger = get_logger(__name__) @@ -40,7 +41,7 @@ class CustomPPOTrainer(PPOTrainer, Trainer): generating_args: "GeneratingArguments", callbacks: List["TrainerCallback"], reward_model: "AutoModelForCausalLMWithValueHead", - **kwargs + **kwargs, ): PPOTrainer.__init__(self, **kwargs) @@ -52,7 +53,7 @@ class CustomPPOTrainer(PPOTrainer, Trainer): self.generation_config = GenerationConfig( pad_token_id=self.tokenizer.pad_token_id, eos_token_id=[self.tokenizer.eos_token_id] + self.tokenizer.additional_special_tokens_ids, - **generating_args.to_dict() + **generating_args.to_dict(), ) self.state = TrainerState() @@ -71,7 +72,7 @@ class CustomPPOTrainer(PPOTrainer, Trainer): if not ( getattr(reward_model.pretrained_model, "is_loaded_in_8bit", False) or getattr(reward_model.pretrained_model, "is_loaded_in_4bit", False) - ): # quantized models are already set on the correct device + ): # quantized models are already set on the correct device self.reward_model = self._prepare_deepspeed(self.reward_model) else: self.reward_model = self.accelerator.prepare_model(self.reward_model, evaluation_mode=True) @@ -111,9 +112,11 @@ class CustomPPOTrainer(PPOTrainer, Trainer): logger.info(" Num examples = {}".format(num_examples)) logger.info(" Num Epochs = {}".format(num_train_epochs)) logger.info(" Instantaneous batch size per device = {}".format(self.args.per_device_train_batch_size)) - logger.info(" Total train batch size (w. parallel, buffer, distributed & accumulation) = {}".format( - total_train_batch_size - )) + logger.info( + " Total train batch size (w. parallel, buffer, distributed & accumulation) = {}".format( + total_train_batch_size + ) + ) logger.info(" Gradient Accumulation steps = {}".format(self.args.gradient_accumulation_steps)) logger.info(" Num optimization epochs per batch = {}".format(self.finetuning_args.ppo_epochs)) logger.info(" Total training steps = {}".format(max_steps)) @@ -138,10 +141,12 @@ class CustomPPOTrainer(PPOTrainer, Trainer): self.model.eval() # Get inputs - self.tokenizer.padding_side = "right" # change padding side + self.tokenizer.padding_side = "right" # change padding side queries, responses, rewards = [], [], [] for idx in range(0, self.config.batch_size, self.config.mini_batch_size): - mini_batch_queries, mini_batch_responses = self.get_inputs(batch[idx:idx+self.config.mini_batch_size]) + mini_batch_queries, mini_batch_responses = self.get_inputs( + batch[idx : idx + self.config.mini_batch_size] + ) mini_batch_rewards = self.get_rewards(mini_batch_queries, mini_batch_responses, unwrapped_model) queries.extend(mini_batch_queries) responses.extend(mini_batch_responses) @@ -154,7 +159,7 @@ class CustomPPOTrainer(PPOTrainer, Trainer): # Run PPO step stats = self.step(queries, responses, rewards) - self.tokenizer.padding_side = "left" # restore padding side + self.tokenizer.padding_side = "left" # restore padding side loss_meter.update(float(stats["ppo/loss/total"]), n=len(rewards)) reward_meter.update(torch.stack(rewards).mean().item(), n=len(rewards)) @@ -163,18 +168,18 @@ class CustomPPOTrainer(PPOTrainer, Trainer): batch["query"] = self.tokenizer.batch_decode(queries, skip_special_tokens=True) batch["response"] = self.tokenizer.batch_decode(responses, skip_special_tokens=True) self.log_stats(stats, batch, rewards) - except: + except Exception: logger.warning("Failed to save stats due to unknown errors.") self.state.global_step += 1 self.log_callback.on_step_end(self.args, self.state, self.control) - if self.is_local_process_zero() and (step+1) % self.args.logging_steps == 0: + if self.is_local_process_zero() and (step + 1) % self.args.logging_steps == 0: logs = dict( loss=round(loss_meter.avg, 4), reward=round(reward_meter.avg, 4), learning_rate=stats["ppo/learning_rate"], - epoch=round(step / steps_in_epoch, 2) + epoch=round(step / steps_in_epoch, 2), ) tqdm.write(str(logs)) logs["step"] = step @@ -183,10 +188,10 @@ class CustomPPOTrainer(PPOTrainer, Trainer): loss_meter.reset() reward_meter.reset() - if (step+1) % self.args.save_steps == 0: # save checkpoint - self.save_model(os.path.join( - self.args.output_dir, "{}-{}".format(PREFIX_CHECKPOINT_DIR, self.state.global_step) - )) + if (step + 1) % self.args.save_steps == 0: # save checkpoint + self.save_model( + os.path.join(self.args.output_dir, "{}-{}".format(PREFIX_CHECKPOINT_DIR, self.state.global_step)) + ) self.save_callback.on_save( self.args, self.state, self.control, model=self.accelerator.unwrap_model(self.model) ) @@ -207,35 +212,33 @@ class CustomPPOTrainer(PPOTrainer, Trainer): if self.model_args.upcast_layernorm: layernorm_params = dump_layernorm(self.model) - if batch["input_ids"].size(0) == 1: # handle llama2 ppo with gradient accumulation > 1 + if batch["input_ids"].size(0) == 1: # handle llama2 ppo with gradient accumulation > 1 start_index = (batch["input_ids"][0] != self.tokenizer.pad_token_id).nonzero()[0].item() for k, v in batch.items(): batch[k] = v[:, start_index:] unwrapped_model: "AutoModelForCausalLMWithValueHead" = self.accelerator.unwrap_model(self.model) generate_output: torch.Tensor = unwrapped_model.generate( - generation_config=self.generation_config, - logits_processor=get_logits_processor(), - **batch + generation_config=self.generation_config, logits_processor=get_logits_processor(), **batch ) if self.model_args.upcast_layernorm: restore_layernorm(self.model, layernorm_params) query = batch["input_ids"].detach().cpu() - response = generate_output[:, batch["input_ids"].size(-1):].detach().cpu() + response = generate_output[:, batch["input_ids"].size(-1) :].detach().cpu() queries, responses = [], [] for i in range(len(query)): query_start_index = (query[i] != self.tokenizer.pad_token_id).nonzero()[0].item() response_index = (response[i] != self.tokenizer.pad_token_id).nonzero() if len(response_index) == 0: - response_length = 1 # allow empty response + response_length = 1 # allow empty response else: response_length = response_index[-1].item() + 1 - queries.append(query[i, query_start_index:]) # remove padding from left - responses.append(response[i, :response_length]) # remove padding from right + queries.append(query[i, query_start_index:]) # remove padding from left + responses.append(response[i, :response_length]) # remove padding from right return queries, responses @@ -244,7 +247,7 @@ class CustomPPOTrainer(PPOTrainer, Trainer): self, queries: List[torch.Tensor], responses: List[torch.Tensor], - unwrapped_model: "AutoModelForCausalLMWithValueHead" + unwrapped_model: "AutoModelForCausalLMWithValueHead", ) -> List[torch.Tensor]: r""" Computes scores using given reward model. @@ -264,17 +267,17 @@ class CustomPPOTrainer(PPOTrainer, Trainer): batch = self.prepare_model_inputs(queries, responses) - with torch.cuda.amp.autocast(dtype=self.model_args.compute_dtype): # support bf16 + with torch.cuda.amp.autocast(dtype=self.model_args.compute_dtype): # support bf16 _, _, values = reward_model(**batch, output_hidden_states=True, return_dict=True) - if getattr(unwrapped_model.config, "model_type", None) == "chatglm": # assume same architecture + if getattr(unwrapped_model.config, "model_type", None) == "chatglm": # assume same architecture values = torch.transpose(values, 0, 1) rewards = [] for i in range(values.size(0)): end_indexes = (batch["input_ids"][i] != self.tokenizer.pad_token_id).nonzero() end_index = end_indexes[-1].item() if len(end_indexes) else 0 - rewards.append(values[i, end_index].float().detach().cpu()) # use fp32 type + rewards.append(values[i, end_index].float().detach().cpu()) # use fp32 type if self.finetuning_args.reward_model_type == "lora": replace_model(unwrapped_model, target="default") @@ -289,7 +292,7 @@ class CustomPPOTrainer(PPOTrainer, Trainer): responses: torch.Tensor, model_inputs: dict, return_logits: Optional[bool] = False, - response_masks: Optional[torch.Tensor] = None + response_masks: Optional[torch.Tensor] = None, ): r""" Calculates model outputs in multiple batches. @@ -312,7 +315,7 @@ class CustomPPOTrainer(PPOTrainer, Trainer): input_ids = input_kwargs["input_ids"] attention_mask = input_kwargs["attention_mask"] - with torch.cuda.amp.autocast(dtype=self.model_args.compute_dtype): # support bf16 + with torch.cuda.amp.autocast(dtype=self.model_args.compute_dtype): # support bf16 logits, _, values = model(**input_kwargs) unwrapped_model: "AutoModelForCausalLMWithValueHead" = self.accelerator.unwrap_model(self.model) @@ -325,14 +328,12 @@ class CustomPPOTrainer(PPOTrainer, Trainer): for j in range(len(query_batch)): start = len(query_batch[j]) - 1 - if attention_mask[j, 0] == 0: # offset left padding + if attention_mask[j, 0] == 0: # offset left padding start += attention_mask[j, :].nonzero()[0].item() end = start + len(response_batch[j]) if response_masks is not None: - response_masks_batch = torch.cat( - (torch.zeros_like(query_batch[j]), response_masks_batch[j]) - )[1:] + response_masks_batch = torch.cat((torch.zeros_like(query_batch[j]), response_masks_batch[j]))[1:] masks[j, :start] = 0 masks[j, end:] = 0 diff --git a/src/llmtuner/train/ppo/utils.py b/src/llmtuner/train/ppo/utils.py index 44e62067..f3cd8236 100644 --- a/src/llmtuner/train/ppo/utils.py +++ b/src/llmtuner/train/ppo/utils.py @@ -1,9 +1,11 @@ import json -import torch from typing import TYPE_CHECKING, Dict, List, Literal, Optional +import torch + from ...extras.packages import is_requests_available + if TYPE_CHECKING: from transformers import PreTrainedModel from trl import AutoModelForCausalLMWithValueHead @@ -21,16 +23,18 @@ def get_rewards_from_server(server_url: str, messages: List[str]) -> List[torch. def replace_model(model: "AutoModelForCausalLMWithValueHead", target: Literal["default", "reward"]) -> None: - if target == "reward": # save default head temporarily + if target == "reward": # save default head temporarily valuehead_state_dict: Dict[str, torch.Tensor] = model.v_head.state_dict() setattr(model, "default_head_weight", valuehead_state_dict["summary.weight"].detach().clone()) setattr(model, "default_head_bias", valuehead_state_dict["summary.bias"].detach().clone()) - model.pretrained_model.set_adapter(target) # set the LoRA adapter to be active - model.v_head.load_state_dict({ - "summary.weight": model.get_buffer("{}_head_weight".format(target)).detach().clone(), - "summary.bias": model.get_buffer("{}_head_bias".format(target)).detach().clone() - }) + model.pretrained_model.set_adapter(target) # set the LoRA adapter to be active + model.v_head.load_state_dict( + { + "summary.weight": model.get_buffer("{}_head_weight".format(target)).detach().clone(), + "summary.bias": model.get_buffer("{}_head_bias".format(target)).detach().clone(), + } + ) def dump_layernorm(model: "PreTrainedModel") -> Dict[str, torch.Tensor]: diff --git a/src/llmtuner/train/ppo/workflow.py b/src/llmtuner/train/ppo/workflow.py index 19df2d3d..119739d2 100644 --- a/src/llmtuner/train/ppo/workflow.py +++ b/src/llmtuner/train/ppo/workflow.py @@ -1,23 +1,26 @@ # Inspired by: https://github.com/lvwerra/trl/blob/main/examples/research_projects/stack_llama/scripts/rl_training.py import math -from trl import PPOConfig +from typing import TYPE_CHECKING, List, Optional + from torch.optim import AdamW -from typing import TYPE_CHECKING, Optional, List from transformers import DataCollatorWithPadding from transformers.optimization import get_scheduler +from trl import PPOConfig from ...data import get_dataset from ...extras.callbacks import FixValueHeadModelCallback from ...extras.misc import fix_valuehead_checkpoint from ...extras.ploting import plot_loss from ...model import load_model_and_tokenizer -from ...train.utils import create_ref_model, create_reward_model from ...train.ppo.trainer import CustomPPOTrainer +from ...train.utils import create_ref_model, create_reward_model + if TYPE_CHECKING: from transformers import Seq2SeqTrainingArguments, TrainerCallback - from ...hparams import ModelArguments, DataArguments, FinetuningArguments, GeneratingArguments + + from ...hparams import DataArguments, FinetuningArguments, GeneratingArguments, ModelArguments def run_ppo( @@ -26,12 +29,14 @@ def run_ppo( training_args: "Seq2SeqTrainingArguments", finetuning_args: "FinetuningArguments", generating_args: "GeneratingArguments", - callbacks: Optional[List["TrainerCallback"]] = None + callbacks: Optional[List["TrainerCallback"]] = None, ): - model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train, add_valuehead=True) + model, tokenizer = load_model_and_tokenizer( + model_args, finetuning_args, training_args.do_train, add_valuehead=True + ) dataset = get_dataset(tokenizer, model_args, data_args, training_args, stage="ppo") - tokenizer.padding_side = "left" # use left-padding in generation while using right-padding in training + tokenizer.padding_side = "left" # use left-padding in generation while using right-padding in training data_collator = DataCollatorWithPadding(tokenizer=tokenizer) # Create reference model and reward model @@ -55,7 +60,7 @@ def run_ppo( use_score_scaling=finetuning_args.ppo_score_norm, use_score_norm=finetuning_args.ppo_score_norm, whiten_rewards=finetuning_args.ppo_whiten_rewards, - accelerator_kwargs={"step_scheduler_with_optimizer": False} + accelerator_kwargs={"step_scheduler_with_optimizer": False}, ) # Create optimizer and scheduler @@ -70,7 +75,7 @@ def run_ppo( training_args.lr_scheduler_type, optimizer=optimizer, num_warmup_steps=training_args.get_warmup_steps(num_training_steps), - num_training_steps=num_training_steps + num_training_steps=num_training_steps, ) # Initialize our Trainer @@ -88,7 +93,7 @@ def run_ppo( dataset=dataset, data_collator=data_collator, optimizer=optimizer, - lr_scheduler=lr_scheduler + lr_scheduler=lr_scheduler, ) # Training @@ -97,6 +102,6 @@ def run_ppo( ppo_trainer.save_model() if training_args.should_save: fix_valuehead_checkpoint(model, training_args.output_dir, training_args.save_safetensors) - ppo_trainer.save_state() # must be called after save_model to have a folder + ppo_trainer.save_state() # must be called after save_model to have a folder if ppo_trainer.is_world_process_zero() and finetuning_args.plot_loss: plot_loss(training_args.output_dir, keys=["loss", "reward"]) diff --git a/src/llmtuner/train/pt/workflow.py b/src/llmtuner/train/pt/workflow.py index f6186b25..cb91cf44 100644 --- a/src/llmtuner/train/pt/workflow.py +++ b/src/llmtuner/train/pt/workflow.py @@ -1,7 +1,8 @@ # Inspired by: https://github.com/huggingface/transformers/blob/v4.34.1/examples/pytorch/language-modeling/run_clm.py import math -from typing import TYPE_CHECKING, Optional, List +from typing import TYPE_CHECKING, List, Optional + from transformers import DataCollatorForLanguageModeling, Trainer from ...data import get_dataset, split_dataset @@ -9,9 +10,11 @@ from ...extras.ploting import plot_loss from ...model import load_model_and_tokenizer from ...train.utils import create_modelcard_and_push + if TYPE_CHECKING: from transformers import Seq2SeqTrainingArguments, TrainerCallback - from ...hparams import ModelArguments, DataArguments, FinetuningArguments + + from ...hparams import DataArguments, FinetuningArguments, ModelArguments def run_pt( @@ -19,7 +22,7 @@ def run_pt( data_args: "DataArguments", training_args: "Seq2SeqTrainingArguments", finetuning_args: "FinetuningArguments", - callbacks: Optional[List["TrainerCallback"]] = None + callbacks: Optional[List["TrainerCallback"]] = None, ): model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train) dataset = get_dataset(tokenizer, model_args, data_args, training_args, stage="pt") @@ -32,7 +35,7 @@ def run_pt( tokenizer=tokenizer, data_collator=data_collator, callbacks=callbacks, - **split_dataset(dataset, data_args, training_args) + **split_dataset(dataset, data_args, training_args), ) # Training diff --git a/src/llmtuner/train/rm/collator.py b/src/llmtuner/train/rm/collator.py index 161f003d..8d5d4ada 100644 --- a/src/llmtuner/train/rm/collator.py +++ b/src/llmtuner/train/rm/collator.py @@ -1,6 +1,7 @@ -import torch from dataclasses import dataclass from typing import Any, Dict, Sequence + +import torch from transformers import DataCollatorWithPadding @@ -20,8 +21,9 @@ class PairwiseDataCollatorWithPadding(DataCollatorWithPadding): features = [ { "input_ids": feature["prompt_ids"] + feature[key], - "attention_mask": [1] * (len(feature["prompt_ids"]) + len(feature[key])) + "attention_mask": [1] * (len(feature["prompt_ids"]) + len(feature[key])), } - for key in ("chosen_ids", "rejected_ids") for feature in features + for key in ("chosen_ids", "rejected_ids") + for feature in features ] return super().__call__(features) diff --git a/src/llmtuner/train/rm/metric.py b/src/llmtuner/train/rm/metric.py index db9c9243..99dc6ab8 100644 --- a/src/llmtuner/train/rm/metric.py +++ b/src/llmtuner/train/rm/metric.py @@ -1,6 +1,7 @@ -import numpy as np from typing import Dict, Sequence, Tuple, Union +import numpy as np + def compute_accuracy(eval_preds: Sequence[Union[np.ndarray, Tuple[np.ndarray]]]) -> Dict[str, float]: preds, _ = eval_preds diff --git a/src/llmtuner/train/rm/trainer.py b/src/llmtuner/train/rm/trainer.py index 909d4373..7d96e7c3 100644 --- a/src/llmtuner/train/rm/trainer.py +++ b/src/llmtuner/train/rm/trainer.py @@ -1,14 +1,16 @@ -import os import json -import torch +import os from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union + +import torch from transformers import Trainer from ...extras.logging import get_logger + if TYPE_CHECKING: - from transformers.trainer import PredictionOutput from transformers.modeling_utils import PreTrainedModel + from transformers.trainer import PredictionOutput logger = get_logger(__name__) @@ -21,13 +23,10 @@ class PairwiseTrainer(Trainer): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.can_return_loss = True # override property to return eval_loss + self.can_return_loss = True # override property to return eval_loss def compute_loss( - self, - model: "PreTrainedModel", - inputs: Dict[str, torch.Tensor], - return_outputs: Optional[bool] = False + self, model: "PreTrainedModel", inputs: Dict[str, torch.Tensor], return_outputs: Optional[bool] = False ) -> Union[torch.Tensor, Tuple[torch.Tensor, List[torch.Tensor]]]: r""" Computes pairwise loss. The first n examples are chosen and the last n examples are rejected. @@ -68,9 +67,9 @@ class PairwiseTrainer(Trainer): assert div_index > 0 chosen_trunc_rewards = chosen_rewards[i, div_index:end_index] rejected_trunc_rewards = rejected_rewards[i, div_index:end_index] - if return_outputs: # use the score on the last token except pad token for inference - chosen_scores.append(chosen_rewards[i, chosen_length-1]) - rejected_scores.append(rejected_rewards[i, rejected_length-1]) + if return_outputs: # use the score on the last token except pad token for inference + chosen_scores.append(chosen_rewards[i, chosen_length - 1]) + rejected_scores.append(rejected_rewards[i, rejected_length - 1]) loss += -torch.nn.functional.logsigmoid(chosen_trunc_rewards - rejected_trunc_rewards).mean() loss = loss / batch_size @@ -80,10 +79,7 @@ class PairwiseTrainer(Trainer): return loss - def save_predictions( - self, - predict_results: "PredictionOutput" - ) -> None: + def save_predictions(self, predict_results: "PredictionOutput") -> None: r""" Saves model predictions to `output_dir`. diff --git a/src/llmtuner/train/rm/workflow.py b/src/llmtuner/train/rm/workflow.py index 97c22f7d..0ec9d9de 100644 --- a/src/llmtuner/train/rm/workflow.py +++ b/src/llmtuner/train/rm/workflow.py @@ -1,6 +1,7 @@ # Inspired by: https://github.com/CarperAI/trlx/blob/main/examples/summarize_rlhf/reward_model/train_reward_model_gptj.py -from typing import TYPE_CHECKING, Optional, List +from typing import TYPE_CHECKING, List, Optional + from transformers import Seq2SeqTrainingArguments from ...data import get_dataset, split_dataset @@ -13,9 +14,11 @@ from ...train.rm.metric import compute_accuracy from ...train.rm.trainer import PairwiseTrainer from ...train.utils import create_modelcard_and_push + if TYPE_CHECKING: from transformers import TrainerCallback - from ...hparams import ModelArguments, DataArguments, FinetuningArguments + + from ...hparams import DataArguments, FinetuningArguments, ModelArguments def run_rm( @@ -23,15 +26,17 @@ def run_rm( data_args: "DataArguments", training_args: "Seq2SeqTrainingArguments", finetuning_args: "FinetuningArguments", - callbacks: Optional[List["TrainerCallback"]] = None + callbacks: Optional[List["TrainerCallback"]] = None, ): - model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train, add_valuehead=True) + model, tokenizer = load_model_and_tokenizer( + model_args, finetuning_args, training_args.do_train, add_valuehead=True + ) dataset = get_dataset(tokenizer, model_args, data_args, training_args, stage="rm") data_collator = PairwiseDataCollatorWithPadding(tokenizer, pad_to_multiple_of=8) # Update arguments training_args_dict = training_args.to_dict() - training_args_dict.update(dict(remove_unused_columns=False)) # important for pairwise dataset + training_args_dict.update(dict(remove_unused_columns=False)) # important for pairwise dataset training_args = Seq2SeqTrainingArguments(**training_args_dict) # Initialize our Trainer @@ -42,7 +47,7 @@ def run_rm( data_collator=data_collator, callbacks=callbacks + [FixValueHeadModelCallback()], compute_metrics=compute_accuracy, - **split_dataset(dataset, data_args, training_args) + **split_dataset(dataset, data_args, training_args), ) # Training diff --git a/src/llmtuner/train/sft/metric.py b/src/llmtuner/train/sft/metric.py index 2741c66b..f37eba97 100644 --- a/src/llmtuner/train/sft/metric.py +++ b/src/llmtuner/train/sft/metric.py @@ -1,11 +1,11 @@ -import numpy as np from dataclasses import dataclass from typing import TYPE_CHECKING, Dict, Sequence, Tuple, Union +import numpy as np + from ...extras.constants import IGNORE_INDEX -from ...extras.packages import ( - is_jieba_available, is_nltk_available, is_rouge_available -) +from ...extras.packages import is_jieba_available, is_nltk_available, is_rouge_available + if TYPE_CHECKING: from transformers.tokenization_utils import PreTrainedTokenizer @@ -14,7 +14,7 @@ if is_jieba_available(): import jieba if is_nltk_available(): - from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction + from nltk.translate.bleu_score import SmoothingFunction, sentence_bleu if is_rouge_available(): from rouge_chinese import Rouge diff --git a/src/llmtuner/train/sft/trainer.py b/src/llmtuner/train/sft/trainer.py index c8d9f039..36d09f3a 100644 --- a/src/llmtuner/train/sft/trainer.py +++ b/src/llmtuner/train/sft/trainer.py @@ -1,14 +1,16 @@ -import os import json -import torch -import numpy as np -import torch.nn as nn +import os from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union + +import numpy as np +import torch +import torch.nn as nn from transformers import Seq2SeqTrainer from ...extras.constants import IGNORE_INDEX from ...extras.logging import get_logger + if TYPE_CHECKING: from transformers.trainer import PredictionOutput @@ -33,16 +35,16 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer): Subclass and override to inject custom behavior. """ - labels = inputs["labels"].detach().clone() if "labels" in inputs else None # backup labels + labels = inputs["labels"].detach().clone() if "labels" in inputs else None # backup labels if self.args.predict_with_generate: assert self.tokenizer.padding_side == "left", "This method only accepts left-padded tensor." prompt_len, label_len = inputs["input_ids"].size(-1), inputs["labels"].size(-1) if prompt_len > label_len: inputs["labels"] = self._pad_tensors_to_target_len(inputs["labels"], inputs["input_ids"]) - if label_len > prompt_len: # truncate the labels instead of padding the inputs (llama2 fp16 compatibility) + if label_len > prompt_len: # truncate the labels instead of padding the inputs (llama2 fp16 compatibility) inputs["labels"] = inputs["labels"][:, :prompt_len] - loss, generated_tokens, _ = super().prediction_step( # ignore the returned labels (may be truncated) + loss, generated_tokens, _ = super().prediction_step( # ignore the returned labels (may be truncated) model, inputs, prediction_loss_only=prediction_loss_only, ignore_keys=ignore_keys ) if generated_tokens is not None and self.args.predict_with_generate: @@ -51,23 +53,16 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer): return loss, generated_tokens, labels - def _pad_tensors_to_target_len( - self, - src_tensor: torch.Tensor, - tgt_tensor: torch.Tensor - ) -> torch.Tensor: + def _pad_tensors_to_target_len(self, src_tensor: torch.Tensor, tgt_tensor: torch.Tensor) -> torch.Tensor: r""" Pads the tensor to the same length as the target tensor. """ assert self.tokenizer.pad_token_id is not None, "Pad token is required." padded_tensor = self.tokenizer.pad_token_id * torch.ones_like(tgt_tensor) - padded_tensor[:, -src_tensor.shape[-1]:] = src_tensor # adopt left-padding - return padded_tensor.contiguous() # in contiguous memory + padded_tensor[:, -src_tensor.shape[-1] :] = src_tensor # adopt left-padding + return padded_tensor.contiguous() # in contiguous memory - def save_predictions( - self, - predict_results: "PredictionOutput" - ) -> None: + def save_predictions(self, predict_results: "PredictionOutput") -> None: r""" Saves model predictions to `output_dir`. @@ -79,15 +74,23 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer): output_prediction_file = os.path.join(self.args.output_dir, "generated_predictions.jsonl") logger.info(f"Saving prediction results to {output_prediction_file}") - labels = np.where(predict_results.label_ids != IGNORE_INDEX, predict_results.label_ids, self.tokenizer.pad_token_id) - preds = np.where(predict_results.predictions != IGNORE_INDEX, predict_results.predictions, self.tokenizer.pad_token_id) + labels = np.where( + predict_results.label_ids != IGNORE_INDEX, predict_results.label_ids, self.tokenizer.pad_token_id + ) + preds = np.where( + predict_results.predictions != IGNORE_INDEX, predict_results.predictions, self.tokenizer.pad_token_id + ) for i in range(len(preds)): pad_len = np.nonzero(preds[i] != self.tokenizer.pad_token_id)[0] if len(pad_len): - preds[i] = np.concatenate((preds[i][pad_len[0]:], preds[i][:pad_len[0]]), axis=-1) # move pad token to last + preds[i] = np.concatenate( + (preds[i][pad_len[0] :], preds[i][: pad_len[0]]), axis=-1 + ) # move pad token to last - decoded_labels = self.tokenizer.batch_decode(labels, skip_special_tokens=True, clean_up_tokenization_spaces=False) + decoded_labels = self.tokenizer.batch_decode( + labels, skip_special_tokens=True, clean_up_tokenization_spaces=False + ) decoded_preds = self.tokenizer.batch_decode(preds, skip_special_tokens=True, clean_up_tokenization_spaces=True) with open(output_prediction_file, "w", encoding="utf-8") as writer: diff --git a/src/llmtuner/train/sft/workflow.py b/src/llmtuner/train/sft/workflow.py index 50287c8f..75b4d3d1 100644 --- a/src/llmtuner/train/sft/workflow.py +++ b/src/llmtuner/train/sft/workflow.py @@ -1,6 +1,7 @@ # Inspired by: https://github.com/huggingface/transformers/blob/v4.34.1/examples/pytorch/summarization/run_summarization.py -from typing import TYPE_CHECKING, Optional, List +from typing import TYPE_CHECKING, List, Optional + from transformers import DataCollatorForSeq2Seq, Seq2SeqTrainingArguments from ...data import get_dataset, split_dataset @@ -15,7 +16,8 @@ from ...train.utils import create_modelcard_and_push if TYPE_CHECKING: from transformers import TrainerCallback - from ...hparams import ModelArguments, DataArguments, FinetuningArguments, GeneratingArguments + + from ...hparams import DataArguments, FinetuningArguments, GeneratingArguments, ModelArguments def run_sft( @@ -24,29 +26,31 @@ def run_sft( training_args: "Seq2SeqTrainingArguments", finetuning_args: "FinetuningArguments", generating_args: "GeneratingArguments", - callbacks: Optional[List["TrainerCallback"]] = None + callbacks: Optional[List["TrainerCallback"]] = None, ): model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train) dataset = get_dataset(tokenizer, model_args, data_args, training_args, stage="sft") if training_args.predict_with_generate: - tokenizer.padding_side = "left" # use left-padding in generation + tokenizer.padding_side = "left" # use left-padding in generation if getattr(model, "is_quantized", False) and not training_args.do_train: - setattr(model, "_hf_peft_config_loaded", True) # hack here: make model compatible with prediction + setattr(model, "_hf_peft_config_loaded", True) # hack here: make model compatible with prediction data_collator = DataCollatorForSeq2Seq( tokenizer=tokenizer, - pad_to_multiple_of=8 if tokenizer.padding_side == "right" else None, # for shift short attention - label_pad_token_id=IGNORE_INDEX if data_args.ignore_pad_token_for_loss else tokenizer.pad_token_id + pad_to_multiple_of=8 if tokenizer.padding_side == "right" else None, # for shift short attention + label_pad_token_id=IGNORE_INDEX if data_args.ignore_pad_token_for_loss else tokenizer.pad_token_id, ) # Override the decoding parameters of Seq2SeqTrainer training_args_dict = training_args.to_dict() - training_args_dict.update(dict( - generation_max_length=training_args.generation_max_length or data_args.cutoff_len, - generation_num_beams=data_args.eval_num_beams or training_args.generation_num_beams - )) + training_args_dict.update( + dict( + generation_max_length=training_args.generation_max_length or data_args.cutoff_len, + generation_num_beams=data_args.eval_num_beams or training_args.generation_num_beams, + ) + ) training_args = Seq2SeqTrainingArguments(**training_args_dict) # Initialize our Trainer @@ -57,7 +61,7 @@ def run_sft( data_collator=data_collator, callbacks=callbacks, compute_metrics=ComputeMetrics(tokenizer) if training_args.predict_with_generate else None, - **split_dataset(dataset, data_args, training_args) + **split_dataset(dataset, data_args, training_args), ) # Keyword arguments for `model.generate` @@ -79,7 +83,7 @@ def run_sft( # Evaluation if training_args.do_eval: metrics = trainer.evaluate(metric_key_prefix="eval", **gen_kwargs) - if training_args.predict_with_generate: # eval_loss will be wrong if predict_with_generate is enabled + if training_args.predict_with_generate: # eval_loss will be wrong if predict_with_generate is enabled metrics.pop("eval_loss", None) trainer.log_metrics("eval", metrics) trainer.save_metrics("eval", metrics) @@ -87,7 +91,7 @@ def run_sft( # Predict if training_args.do_predict: predict_results = trainer.predict(dataset, metric_key_prefix="predict", **gen_kwargs) - if training_args.predict_with_generate: # predict_loss will be wrong if predict_with_generate is enabled + if training_args.predict_with_generate: # predict_loss will be wrong if predict_with_generate is enabled predict_results.metrics.pop("predict_loss", None) trainer.log_metrics("predict", predict_results.metrics) trainer.save_metrics("predict", predict_results.metrics) diff --git a/src/llmtuner/train/tuner.py b/src/llmtuner/train/tuner.py index 12c0791e..d61d9a01 100644 --- a/src/llmtuner/train/tuner.py +++ b/src/llmtuner/train/tuner.py @@ -1,16 +1,18 @@ -import torch from typing import TYPE_CHECKING, Any, Dict, List, Optional + +import torch from transformers import PreTrainedModel from ..extras.callbacks import LogCallback from ..extras.logging import get_logger -from ..hparams import get_train_args, get_infer_args +from ..hparams import get_infer_args, get_train_args from ..model import load_model_and_tokenizer -from .pt import run_pt -from .sft import run_sft -from .rm import run_rm -from .ppo import run_ppo from .dpo import run_dpo +from .ppo import run_ppo +from .pt import run_pt +from .rm import run_rm +from .sft import run_sft + if TYPE_CHECKING: from transformers import TrainerCallback @@ -64,23 +66,23 @@ def export_model(args: Optional[Dict[str, Any]] = None): model.save_pretrained( save_directory=model_args.export_dir, max_shard_size="{}GB".format(model_args.export_size), - safe_serialization=(not model_args.export_legacy_format) + safe_serialization=(not model_args.export_legacy_format), ) if model_args.export_hub_model_id is not None: model.push_to_hub( model_args.export_hub_model_id, token=model_args.hf_hub_token, max_shard_size="{}GB".format(model_args.export_size), - safe_serialization=(not model_args.export_legacy_format) + safe_serialization=(not model_args.export_legacy_format), ) try: - tokenizer.padding_side = "left" # restore padding side + tokenizer.padding_side = "left" # restore padding side tokenizer.init_kwargs["padding_side"] = "left" tokenizer.save_pretrained(model_args.export_dir) if model_args.export_hub_model_id is not None: tokenizer.push_to_hub(model_args.export_hub_model_id, token=model_args.hf_hub_token) - except: + except Exception: logger.warning("Cannot save tokenizer, please copy the files manually.") diff --git a/src/llmtuner/train/utils.py b/src/llmtuner/train/utils.py index 789986e4..e8d2603b 100644 --- a/src/llmtuner/train/utils.py +++ b/src/llmtuner/train/utils.py @@ -1,14 +1,17 @@ -import torch from typing import TYPE_CHECKING, Optional, Union +import torch + from ..extras.logging import get_logger -from ..hparams import ModelArguments, FinetuningArguments +from ..hparams import FinetuningArguments, ModelArguments from ..model import get_modelcard_args, load_model_and_tokenizer, load_valuehead_params + if TYPE_CHECKING: from transformers import Seq2SeqTrainingArguments, Trainer from transformers.modeling_utils import PreTrainedModel from trl import AutoModelForCausalLMWithValueHead + from ..hparams import DataArguments @@ -20,7 +23,7 @@ def create_modelcard_and_push( model_args: "ModelArguments", data_args: "DataArguments", training_args: "Seq2SeqTrainingArguments", - finetuning_args: "FinetuningArguments" + finetuning_args: "FinetuningArguments", ) -> None: if training_args.do_train: if training_args.push_to_hub: @@ -33,9 +36,7 @@ def create_modelcard_and_push( def create_ref_model( - model_args: "ModelArguments", - finetuning_args: "FinetuningArguments", - add_valuehead: Optional[bool] = False + model_args: "ModelArguments", finetuning_args: "FinetuningArguments", add_valuehead: Optional[bool] = False ) -> Union["PreTrainedModel", "AutoModelForCausalLMWithValueHead"]: r""" Creates reference model for PPO/DPO training. Evaluation mode is not supported. @@ -44,11 +45,13 @@ def create_ref_model( """ if finetuning_args.ref_model is not None: ref_model_args_dict = model_args.to_dict() - ref_model_args_dict.update(dict( - model_name_or_path=finetuning_args.ref_model, - adapter_name_or_path=finetuning_args.ref_model_adapters, - quantization_bit=finetuning_args.ref_model_quantization_bit - )) + ref_model_args_dict.update( + dict( + model_name_or_path=finetuning_args.ref_model, + adapter_name_or_path=finetuning_args.ref_model_adapters, + quantization_bit=finetuning_args.ref_model_quantization_bit, + ) + ) ref_model_args = ModelArguments(**ref_model_args_dict) ref_finetuning_args = FinetuningArguments(finetuning_type="lora") ref_model, _ = load_model_and_tokenizer( @@ -68,9 +71,7 @@ def create_ref_model( def create_reward_model( - model: "AutoModelForCausalLMWithValueHead", - model_args: "ModelArguments", - finetuning_args: "FinetuningArguments" + model: "AutoModelForCausalLMWithValueHead", model_args: "ModelArguments", finetuning_args: "FinetuningArguments" ) -> "AutoModelForCausalLMWithValueHead": r""" Creates reward model for PPO training. @@ -81,24 +82,30 @@ def create_reward_model( return finetuning_args.reward_model elif finetuning_args.reward_model_type == "lora": model.pretrained_model.load_adapter(finetuning_args.reward_model, "reward") - for name, param in model.named_parameters(): # https://github.com/huggingface/peft/issues/1090 + for name, param in model.named_parameters(): # https://github.com/huggingface/peft/issues/1090 if "default" in name: - param.data = param.data.to(torch.float32) # trainable params should in fp32 + param.data = param.data.to(torch.float32) # trainable params should in fp32 vhead_params = load_valuehead_params(finetuning_args.reward_model, model_args) assert vhead_params is not None, "Reward model is not correctly loaded." model.register_buffer("reward_head_weight", vhead_params["v_head.summary.weight"], persistent=False) model.register_buffer("reward_head_bias", vhead_params["v_head.summary.bias"], persistent=False) - model.register_buffer("default_head_weight", torch.zeros_like(vhead_params["v_head.summary.weight"]), persistent=False) - model.register_buffer("default_head_bias", torch.zeros_like(vhead_params["v_head.summary.bias"]), persistent=False) + model.register_buffer( + "default_head_weight", torch.zeros_like(vhead_params["v_head.summary.weight"]), persistent=False + ) + model.register_buffer( + "default_head_bias", torch.zeros_like(vhead_params["v_head.summary.bias"]), persistent=False + ) logger.info("Loaded adapter weights of reward model from {}".format(finetuning_args.reward_model)) return None else: reward_model_args_dict = model_args.to_dict() - reward_model_args_dict.update(dict( - model_name_or_path=finetuning_args.reward_model, - adapter_name_or_path=finetuning_args.reward_model_adapters, - quantization_bit=finetuning_args.reward_model_quantization_bit - )) + reward_model_args_dict.update( + dict( + model_name_or_path=finetuning_args.reward_model, + adapter_name_or_path=finetuning_args.reward_model_adapters, + quantization_bit=finetuning_args.reward_model_quantization_bit, + ) + ) reward_model_args = ModelArguments(**reward_model_args_dict) reward_finetuning_args = FinetuningArguments(finetuning_type="lora") reward_model, _ = load_model_and_tokenizer( diff --git a/src/llmtuner/webui/chatter.py b/src/llmtuner/webui/chatter.py index e211bb2a..e16328a0 100644 --- a/src/llmtuner/webui/chatter.py +++ b/src/llmtuner/webui/chatter.py @@ -1,24 +1,22 @@ -import gradio as gr -from gradio.components import Component # cannot use TYPE_CHECKING here from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional, Tuple +import gradio as gr +from gradio.components import Component # cannot use TYPE_CHECKING here + from ..chat import ChatModel from ..extras.misc import torch_gc from ..hparams import GeneratingArguments from .common import get_save_dir from .locales import ALERTS + if TYPE_CHECKING: from .manager import Manager class WebChatModel(ChatModel): - def __init__( - self, - manager: "Manager", - demo_mode: Optional[bool] = False, - lazy_init: Optional[bool] = True + self, manager: "Manager", demo_mode: Optional[bool] = False, lazy_init: Optional[bool] = True ) -> None: self.manager = manager self.demo_mode = demo_mode @@ -26,11 +24,12 @@ class WebChatModel(ChatModel): self.tokenizer = None self.generating_args = GeneratingArguments() - if not lazy_init: # read arguments from command line + if not lazy_init: # read arguments from command line super().__init__() - if demo_mode: # load demo_config.json if exists + if demo_mode: # load demo_config.json if exists import json + try: with open("demo_config.json", "r", encoding="utf-8") as f: args = json.load(f) @@ -38,7 +37,7 @@ class WebChatModel(ChatModel): super().__init__(args) except AssertionError: print("Please provided model name and template in `demo_config.json`.") - except: + except Exception: print("Cannot find `demo_config.json` at current directory.") @property @@ -64,9 +63,12 @@ class WebChatModel(ChatModel): return if get("top.adapter_path"): - adapter_name_or_path = ",".join([ - get_save_dir(get("top.model_name"), get("top.finetuning_type"), adapter) - for adapter in get("top.adapter_path")]) + adapter_name_or_path = ",".join( + [ + get_save_dir(get("top.model_name"), get("top.finetuning_type"), adapter) + for adapter in get("top.adapter_path") + ] + ) else: adapter_name_or_path = None @@ -79,7 +81,7 @@ class WebChatModel(ChatModel): template=get("top.template"), flash_attn=(get("top.booster") == "flash_attn"), use_unsloth=(get("top.booster") == "unsloth"), - rope_scaling=get("top.rope_scaling") if get("top.rope_scaling") in ["linear", "dynamic"] else None + rope_scaling=get("top.rope_scaling") if get("top.rope_scaling") in ["linear", "dynamic"] else None, ) super().__init__(args) @@ -108,7 +110,7 @@ class WebChatModel(ChatModel): tools: str, max_new_tokens: int, top_p: float, - temperature: float + temperature: float, ) -> Generator[Tuple[List[Tuple[str, str]], List[Tuple[str, str]]], None, None]: chatbot.append([query, ""]) response = "" diff --git a/src/llmtuner/webui/common.py b/src/llmtuner/webui/common.py index 3d431aeb..3a968110 100644 --- a/src/llmtuner/webui/common.py +++ b/src/llmtuner/webui/common.py @@ -1,9 +1,10 @@ -import os import json -import gradio as gr +import os from collections import defaultdict from typing import Any, Dict, Optional -from peft.utils import WEIGHTS_NAME, SAFETENSORS_WEIGHTS_NAME + +import gradio as gr +from peft.utils import SAFETENSORS_WEIGHTS_NAME, WEIGHTS_NAME from ..extras.constants import ( DATA_CONFIG, @@ -12,7 +13,7 @@ from ..extras.constants import ( PEFT_METHODS, SUPPORTED_MODELS, TRAINING_STAGES, - DownloadSource + DownloadSource, ) from ..extras.misc import use_modelscope @@ -36,7 +37,7 @@ def load_config() -> Dict[str, Any]: try: with open(get_config_path(), "r", encoding="utf-8") as f: return json.load(f) - except: + except Exception: return {"lang": None, "last_model": None, "path_dict": {}, "cache_dir": None} @@ -59,7 +60,7 @@ def get_model_path(model_name: str) -> str: use_modelscope() and path_dict.get(DownloadSource.MODELSCOPE) and model_path == path_dict.get(DownloadSource.DEFAULT) - ): # replace path + ): # replace path model_path = path_dict.get(DownloadSource.MODELSCOPE) return model_path @@ -87,9 +88,8 @@ def list_adapters(model_name: str, finetuning_type: str) -> Dict[str, Any]: save_dir = get_save_dir(model_name, finetuning_type) if save_dir and os.path.isdir(save_dir): for adapter in os.listdir(save_dir): - if ( - os.path.isdir(os.path.join(save_dir, adapter)) - and any([os.path.isfile(os.path.join(save_dir, adapter, name)) for name in ADAPTER_NAMES]) + if os.path.isdir(os.path.join(save_dir, adapter)) and any( + os.path.isfile(os.path.join(save_dir, adapter, name)) for name in ADAPTER_NAMES ): adapters.append(adapter) return gr.update(value=[], choices=adapters, interactive=True) diff --git a/src/llmtuner/webui/components/__init__.py b/src/llmtuner/webui/components/__init__.py index 2e9a87ec..1352af8e 100644 --- a/src/llmtuner/webui/components/__init__.py +++ b/src/llmtuner/webui/components/__init__.py @@ -1,11 +1,16 @@ +from .chatbot import create_chat_box +from .eval import create_eval_tab +from .export import create_export_tab +from .infer import create_infer_tab from .top import create_top from .train import create_train_tab -from .eval import create_eval_tab -from .infer import create_infer_tab -from .export import create_export_tab -from .chatbot import create_chat_box __all__ = [ - "create_top", "create_train_tab", "create_eval_tab", "create_infer_tab", "create_export_tab", "create_chat_box" + "create_top", + "create_train_tab", + "create_eval_tab", + "create_infer_tab", + "create_export_tab", + "create_chat_box", ] diff --git a/src/llmtuner/webui/components/chatbot.py b/src/llmtuner/webui/components/chatbot.py index ebc1b71f..554fb686 100644 --- a/src/llmtuner/webui/components/chatbot.py +++ b/src/llmtuner/webui/components/chatbot.py @@ -1,6 +1,7 @@ -import gradio as gr from typing import TYPE_CHECKING, Dict, Optional, Tuple +import gradio as gr + from ..utils import check_json_schema @@ -12,8 +13,7 @@ if TYPE_CHECKING: def create_chat_box( - engine: "Engine", - visible: Optional[bool] = False + engine: "Engine", visible: Optional[bool] = False ) -> Tuple["Block", "Component", "Component", Dict[str, "Component"]]: with gr.Box(visible=visible) as chat_box: chatbot = gr.Chatbot() @@ -38,20 +38,23 @@ def create_chat_box( engine.chatter.predict, [chatbot, query, history, system, tools, max_new_tokens, top_p, temperature], [chatbot, history], - show_progress=True - ).then( - lambda: gr.update(value=""), outputs=[query] - ) + show_progress=True, + ).then(lambda: gr.update(value=""), outputs=[query]) clear_btn.click(lambda: ([], []), outputs=[chatbot, history], show_progress=True) - return chat_box, chatbot, history, dict( - system=system, - tools=tools, - query=query, - submit_btn=submit_btn, - clear_btn=clear_btn, - max_new_tokens=max_new_tokens, - top_p=top_p, - temperature=temperature + return ( + chat_box, + chatbot, + history, + dict( + system=system, + tools=tools, + query=query, + submit_btn=submit_btn, + clear_btn=clear_btn, + max_new_tokens=max_new_tokens, + top_p=top_p, + temperature=temperature, + ), ) diff --git a/src/llmtuner/webui/components/data.py b/src/llmtuner/webui/components/data.py index 3a50065a..c63b6ea5 100644 --- a/src/llmtuner/webui/components/data.py +++ b/src/llmtuner/webui/components/data.py @@ -1,10 +1,12 @@ -import os import json -import gradio as gr +import os from typing import TYPE_CHECKING, Any, Dict, Tuple +import gradio as gr + from ...extras.constants import DATA_CONFIG + if TYPE_CHECKING: from gradio.components import Component @@ -24,7 +26,7 @@ def can_preview(dataset_dir: str, dataset: list) -> Dict[str, Any]: try: with open(os.path.join(dataset_dir, DATA_CONFIG), "r", encoding="utf-8") as f: dataset_info = json.load(f) - except: + except Exception: return gr.update(interactive=False) if ( @@ -48,7 +50,7 @@ def get_preview(dataset_dir: str, dataset: list, page_index: int) -> Tuple[int, elif data_file.endswith(".jsonl"): data = [json.loads(line) for line in f] else: - data = [line for line in f] + data = [line for line in f] # noqa: C416 return len(data), data[PAGE_SIZE * page_index : PAGE_SIZE * (page_index + 1)], gr.update(visible=True) @@ -67,32 +69,17 @@ def create_preview_box(dataset_dir: "gr.Textbox", dataset: "gr.Dropdown") -> Dic with gr.Row(): preview_samples = gr.JSON(interactive=False) - dataset.change( - can_preview, [dataset_dir, dataset], [data_preview_btn], queue=False - ).then( + dataset.change(can_preview, [dataset_dir, dataset], [data_preview_btn], queue=False).then( lambda: 0, outputs=[page_index], queue=False ) data_preview_btn.click( - get_preview, - [dataset_dir, dataset, page_index], - [preview_count, preview_samples, preview_box], - queue=False + get_preview, [dataset_dir, dataset, page_index], [preview_count, preview_samples, preview_box], queue=False ) - prev_btn.click( - prev_page, [page_index], [page_index], queue=False - ).then( - get_preview, - [dataset_dir, dataset, page_index], - [preview_count, preview_samples, preview_box], - queue=False + prev_btn.click(prev_page, [page_index], [page_index], queue=False).then( + get_preview, [dataset_dir, dataset, page_index], [preview_count, preview_samples, preview_box], queue=False ) - next_btn.click( - next_page, [page_index, preview_count], [page_index], queue=False - ).then( - get_preview, - [dataset_dir, dataset, page_index], - [preview_count, preview_samples, preview_box], - queue=False + next_btn.click(next_page, [page_index, preview_count], [page_index], queue=False).then( + get_preview, [dataset_dir, dataset, page_index], [preview_count, preview_samples, preview_box], queue=False ) close_btn.click(lambda: gr.update(visible=False), outputs=[preview_box], queue=False) return dict( @@ -102,5 +89,5 @@ def create_preview_box(dataset_dir: "gr.Textbox", dataset: "gr.Dropdown") -> Dic prev_btn=prev_btn, next_btn=next_btn, close_btn=close_btn, - preview_samples=preview_samples + preview_samples=preview_samples, ) diff --git a/src/llmtuner/webui/components/eval.py b/src/llmtuner/webui/components/eval.py index d900ad29..4c35ad8f 100644 --- a/src/llmtuner/webui/components/eval.py +++ b/src/llmtuner/webui/components/eval.py @@ -1,9 +1,11 @@ -import gradio as gr from typing import TYPE_CHECKING, Dict -from ..common import list_dataset, DEFAULT_DATA_DIR +import gradio as gr + +from ..common import DEFAULT_DATA_DIR, list_dataset from .data import create_preview_box + if TYPE_CHECKING: from gradio.components import Component @@ -31,9 +33,7 @@ def create_eval_tab(engine: "Engine") -> Dict[str, "Component"]: predict = gr.Checkbox(value=True) input_elems.update({cutoff_len, max_samples, batch_size, predict}) - elem_dict.update(dict( - cutoff_len=cutoff_len, max_samples=max_samples, batch_size=batch_size, predict=predict - )) + elem_dict.update(dict(cutoff_len=cutoff_len, max_samples=max_samples, batch_size=batch_size, predict=predict)) with gr.Row(): max_new_tokens = gr.Slider(10, 2048, value=128, step=1) @@ -42,9 +42,7 @@ def create_eval_tab(engine: "Engine") -> Dict[str, "Component"]: output_dir = gr.Textbox() input_elems.update({max_new_tokens, top_p, temperature, output_dir}) - elem_dict.update(dict( - max_new_tokens=max_new_tokens, top_p=top_p, temperature=temperature, output_dir=output_dir - )) + elem_dict.update(dict(max_new_tokens=max_new_tokens, top_p=top_p, temperature=temperature, output_dir=output_dir)) with gr.Row(): cmd_preview_btn = gr.Button() @@ -59,10 +57,16 @@ def create_eval_tab(engine: "Engine") -> Dict[str, "Component"]: output_box = gr.Markdown() output_elems = [output_box, process_bar] - elem_dict.update(dict( - cmd_preview_btn=cmd_preview_btn, start_btn=start_btn, stop_btn=stop_btn, - resume_btn=resume_btn, process_bar=process_bar, output_box=output_box - )) + elem_dict.update( + dict( + cmd_preview_btn=cmd_preview_btn, + start_btn=start_btn, + stop_btn=stop_btn, + resume_btn=resume_btn, + process_bar=process_bar, + output_box=output_box, + ) + ) cmd_preview_btn.click(engine.runner.preview_eval, input_elems, output_elems) start_btn.click(engine.runner.run_eval, input_elems, output_elems) diff --git a/src/llmtuner/webui/components/export.py b/src/llmtuner/webui/components/export.py index 187cda64..c217c620 100644 --- a/src/llmtuner/webui/components/export.py +++ b/src/llmtuner/webui/components/export.py @@ -1,10 +1,12 @@ -import gradio as gr from typing import TYPE_CHECKING, Dict, Generator, List +import gradio as gr + from ...train import export_model from ..common import get_save_dir from ..locales import ALERTS + if TYPE_CHECKING: from gradio.components import Component @@ -24,7 +26,7 @@ def save_model( max_shard_size: int, export_quantization_bit: int, export_quantization_dataset: str, - export_dir: str + export_dir: str, ) -> Generator[str, None, None]: error = "" if not model_name: @@ -44,7 +46,9 @@ def save_model( return if adapter_path: - adapter_name_or_path = ",".join([get_save_dir(model_name, finetuning_type, adapter) for adapter in adapter_path]) + adapter_name_or_path = ",".join( + [get_save_dir(model_name, finetuning_type, adapter) for adapter in adapter_path] + ) else: adapter_name_or_path = None @@ -56,7 +60,7 @@ def save_model( export_dir=export_dir, export_size=max_shard_size, export_quantization_bit=int(export_quantization_bit) if export_quantization_bit in GPTQ_BITS else None, - export_quantization_dataset=export_quantization_dataset + export_quantization_dataset=export_quantization_dataset, ) yield ALERTS["info_exporting"][lang] @@ -86,9 +90,9 @@ def create_export_tab(engine: "Engine") -> Dict[str, "Component"]: max_shard_size, export_quantization_bit, export_quantization_dataset, - export_dir + export_dir, ], - [info_box] + [info_box], ) return dict( @@ -97,5 +101,5 @@ def create_export_tab(engine: "Engine") -> Dict[str, "Component"]: export_quantization_dataset=export_quantization_dataset, export_dir=export_dir, export_btn=export_btn, - info_box=info_box + info_box=info_box, ) diff --git a/src/llmtuner/webui/components/infer.py b/src/llmtuner/webui/components/infer.py index ba578f10..13a6d8ed 100644 --- a/src/llmtuner/webui/components/infer.py +++ b/src/llmtuner/webui/components/infer.py @@ -1,8 +1,10 @@ -import gradio as gr from typing import TYPE_CHECKING, Dict +import gradio as gr + from .chatbot import create_chat_box + if TYPE_CHECKING: from gradio.components import Component @@ -23,18 +25,12 @@ def create_infer_tab(engine: "Engine") -> Dict[str, "Component"]: chat_box, chatbot, history, chat_elems = create_chat_box(engine, visible=False) elem_dict.update(dict(chat_box=chat_box, **chat_elems)) - load_btn.click( - engine.chatter.load_model, input_elems, [info_box] - ).then( + load_btn.click(engine.chatter.load_model, input_elems, [info_box]).then( lambda: gr.update(visible=engine.chatter.loaded), outputs=[chat_box] ) - unload_btn.click( - engine.chatter.unload_model, input_elems, [info_box] - ).then( + unload_btn.click(engine.chatter.unload_model, input_elems, [info_box]).then( lambda: ([], []), outputs=[chatbot, history] - ).then( - lambda: gr.update(visible=engine.chatter.loaded), outputs=[chat_box] - ) + ).then(lambda: gr.update(visible=engine.chatter.loaded), outputs=[chat_box]) return elem_dict diff --git a/src/llmtuner/webui/components/top.py b/src/llmtuner/webui/components/top.py index b8468186..91bcc0d5 100644 --- a/src/llmtuner/webui/components/top.py +++ b/src/llmtuner/webui/components/top.py @@ -1,11 +1,13 @@ -import gradio as gr from typing import TYPE_CHECKING, Dict +import gradio as gr + from ...data import templates from ...extras.constants import METHODS, SUPPORTED_MODELS from ..common import get_model_path, get_template, list_adapters, save_config from ..utils import can_quantize + if TYPE_CHECKING: from gradio.components import Component @@ -30,25 +32,19 @@ def create_top() -> Dict[str, "Component"]: rope_scaling = gr.Radio(choices=["none", "linear", "dynamic"], value="none") booster = gr.Radio(choices=["none", "flash_attn", "unsloth"], value="none") - model_name.change( - list_adapters, [model_name, finetuning_type], [adapter_path], queue=False - ).then( + model_name.change(list_adapters, [model_name, finetuning_type], [adapter_path], queue=False).then( get_model_path, [model_name], [model_path], queue=False ).then( get_template, [model_name], [template], queue=False - ) # do not save config since the below line will save + ) # do not save config since the below line will save model_path.change(save_config, inputs=[lang, model_name, model_path], queue=False) - finetuning_type.change( - list_adapters, [model_name, finetuning_type], [adapter_path], queue=False - ).then( + finetuning_type.change(list_adapters, [model_name, finetuning_type], [adapter_path], queue=False).then( can_quantize, [finetuning_type], [quantization_bit], queue=False ) - refresh_btn.click( - list_adapters, [model_name, finetuning_type], [adapter_path], queue=False - ) + refresh_btn.click(list_adapters, [model_name, finetuning_type], [adapter_path], queue=False) return dict( lang=lang, @@ -61,5 +57,5 @@ def create_top() -> Dict[str, "Component"]: quantization_bit=quantization_bit, template=template, rope_scaling=rope_scaling, - booster=booster + booster=booster, ) diff --git a/src/llmtuner/webui/components/train.py b/src/llmtuner/webui/components/train.py index 5689f7ad..d93ff791 100644 --- a/src/llmtuner/webui/components/train.py +++ b/src/llmtuner/webui/components/train.py @@ -1,12 +1,14 @@ -import gradio as gr from typing import TYPE_CHECKING, Dict + +import gradio as gr from transformers.trainer_utils import SchedulerType from ...extras.constants import TRAINING_STAGES -from ..common import list_adapters, list_dataset, DEFAULT_DATA_DIR +from ..common import DEFAULT_DATA_DIR, list_adapters, list_dataset from ..components.data import create_preview_box from ..utils import gen_plot + if TYPE_CHECKING: from gradio.components import Component @@ -29,9 +31,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: dataset_dir.change(list_dataset, [dataset_dir, training_stage], [dataset], queue=False) input_elems.update({training_stage, dataset_dir, dataset}) - elem_dict.update(dict( - training_stage=training_stage, dataset_dir=dataset_dir, dataset=dataset, **preview_elems - )) + elem_dict.update(dict(training_stage=training_stage, dataset_dir=dataset_dir, dataset=dataset, **preview_elems)) with gr.Row(): cutoff_len = gr.Slider(value=1024, minimum=4, maximum=8192, step=1) @@ -41,25 +41,33 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: compute_type = gr.Radio(choices=["fp16", "bf16", "fp32"], value="fp16") input_elems.update({cutoff_len, learning_rate, num_train_epochs, max_samples, compute_type}) - elem_dict.update(dict( - cutoff_len=cutoff_len, learning_rate=learning_rate, num_train_epochs=num_train_epochs, - max_samples=max_samples, compute_type=compute_type - )) + elem_dict.update( + dict( + cutoff_len=cutoff_len, + learning_rate=learning_rate, + num_train_epochs=num_train_epochs, + max_samples=max_samples, + compute_type=compute_type, + ) + ) with gr.Row(): batch_size = gr.Slider(value=4, minimum=1, maximum=512, step=1) gradient_accumulation_steps = gr.Slider(value=4, minimum=1, maximum=512, step=1) - lr_scheduler_type = gr.Dropdown( - choices=[scheduler.value for scheduler in SchedulerType], value="cosine" - ) + lr_scheduler_type = gr.Dropdown(choices=[scheduler.value for scheduler in SchedulerType], value="cosine") max_grad_norm = gr.Textbox(value="1.0") val_size = gr.Slider(value=0, minimum=0, maximum=1, step=0.001) input_elems.update({batch_size, gradient_accumulation_steps, lr_scheduler_type, max_grad_norm, val_size}) - elem_dict.update(dict( - batch_size=batch_size, gradient_accumulation_steps=gradient_accumulation_steps, - lr_scheduler_type=lr_scheduler_type, max_grad_norm=max_grad_norm, val_size=val_size - )) + elem_dict.update( + dict( + batch_size=batch_size, + gradient_accumulation_steps=gradient_accumulation_steps, + lr_scheduler_type=lr_scheduler_type, + max_grad_norm=max_grad_norm, + val_size=val_size, + ) + ) with gr.Accordion(label="Extra config", open=False) as extra_tab: with gr.Row(): @@ -73,10 +81,17 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: upcast_layernorm = gr.Checkbox(value=False) input_elems.update({logging_steps, save_steps, warmup_steps, neftune_alpha, sft_packing, upcast_layernorm}) - elem_dict.update(dict( - extra_tab=extra_tab, logging_steps=logging_steps, save_steps=save_steps, warmup_steps=warmup_steps, - neftune_alpha=neftune_alpha, sft_packing=sft_packing, upcast_layernorm=upcast_layernorm - )) + elem_dict.update( + dict( + extra_tab=extra_tab, + logging_steps=logging_steps, + save_steps=save_steps, + warmup_steps=warmup_steps, + neftune_alpha=neftune_alpha, + sft_packing=sft_packing, + upcast_layernorm=upcast_layernorm, + ) + ) with gr.Accordion(label="LoRA config", open=False) as lora_tab: with gr.Row(): @@ -87,10 +102,16 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: create_new_adapter = gr.Checkbox(scale=1) input_elems.update({lora_rank, lora_dropout, lora_target, additional_target, create_new_adapter}) - elem_dict.update(dict( - lora_tab=lora_tab, lora_rank=lora_rank, lora_dropout=lora_dropout, lora_target=lora_target, - additional_target=additional_target, create_new_adapter=create_new_adapter - )) + elem_dict.update( + dict( + lora_tab=lora_tab, + lora_rank=lora_rank, + lora_dropout=lora_dropout, + lora_target=lora_target, + additional_target=additional_target, + create_new_adapter=create_new_adapter, + ) + ) with gr.Accordion(label="RLHF config", open=False) as rlhf_tab: with gr.Row(): @@ -103,13 +124,13 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: list_adapters, [engine.manager.get_elem_by_name("top.model_name"), engine.manager.get_elem_by_name("top.finetuning_type")], [reward_model], - queue=False + queue=False, ) input_elems.update({dpo_beta, dpo_ftx, reward_model}) - elem_dict.update(dict( - rlhf_tab=rlhf_tab, dpo_beta=dpo_beta, dpo_ftx=dpo_ftx, reward_model=reward_model, refresh_btn=refresh_btn - )) + elem_dict.update( + dict(rlhf_tab=rlhf_tab, dpo_beta=dpo_beta, dpo_ftx=dpo_ftx, reward_model=reward_model, refresh_btn=refresh_btn) + ) with gr.Row(): cmd_preview_btn = gr.Button() @@ -139,20 +160,28 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: stop_btn.click(engine.runner.set_abort, queue=False) resume_btn.change(engine.runner.monitor, outputs=output_elems) - elem_dict.update(dict( - cmd_preview_btn=cmd_preview_btn, start_btn=start_btn, stop_btn=stop_btn, output_dir=output_dir, - resume_btn=resume_btn, process_bar=process_bar, output_box=output_box, loss_viewer=loss_viewer - )) + elem_dict.update( + dict( + cmd_preview_btn=cmd_preview_btn, + start_btn=start_btn, + stop_btn=stop_btn, + output_dir=output_dir, + resume_btn=resume_btn, + process_bar=process_bar, + output_box=output_box, + loss_viewer=loss_viewer, + ) + ) output_box.change( gen_plot, [ engine.manager.get_elem_by_name("top.model_name"), engine.manager.get_elem_by_name("top.finetuning_type"), - output_dir + output_dir, ], loss_viewer, - queue=False + queue=False, ) return elem_dict diff --git a/src/llmtuner/webui/engine.py b/src/llmtuner/webui/engine.py index db60b5df..6c87a255 100644 --- a/src/llmtuner/webui/engine.py +++ b/src/llmtuner/webui/engine.py @@ -1,7 +1,8 @@ -import gradio as gr -from gradio.components import Component # cannot use TYPE_CHECKING here from typing import Any, Dict, Generator, Optional +import gradio as gr +from gradio.components import Component # cannot use TYPE_CHECKING here + from .chatter import WebChatModel from .common import get_model_path, list_dataset, load_config from .locales import LOCALES @@ -11,7 +12,6 @@ from .utils import get_time class Engine: - def __init__(self, demo_mode: Optional[bool] = False, pure_chat: Optional[bool] = False) -> None: self.demo_mode = demo_mode self.pure_chat = pure_chat @@ -26,10 +26,7 @@ class Engine: user_config = load_config() if not self.demo_mode else {} lang = user_config.get("lang", None) or "en" - init_dict = { - "top.lang": {"value": lang}, - "infer.chat_box": {"visible": self.chatter.loaded} - } + init_dict = {"top.lang": {"value": lang}, "infer.chat_box": {"visible": self.chatter.loaded}} if not self.pure_chat: init_dict["train.dataset"] = {"choices": list_dataset()["choices"]} @@ -49,13 +46,17 @@ class Engine: else: yield self._form_dict({"eval.resume_btn": {"value": True}}) else: - yield self._form_dict({ - "train.output_dir": {"value": "train_" + get_time()}, - "eval.output_dir": {"value": "eval_" + get_time()}, - }) + yield self._form_dict( + { + "train.output_dir": {"value": "train_" + get_time()}, + "eval.output_dir": {"value": "eval_" + get_time()}, + } + ) def change_lang(self, lang: str) -> Dict[Component, Dict[str, Any]]: return { component: gr.update(**LOCALES[name][lang]) - for elems in self.manager.all_elems.values() for name, component in elems.items() if name in LOCALES + for elems in self.manager.all_elems.values() + for name, component in elems.items() + if name in LOCALES } diff --git a/src/llmtuner/webui/interface.py b/src/llmtuner/webui/interface.py index 39ddca04..b84fa948 100644 --- a/src/llmtuner/webui/interface.py +++ b/src/llmtuner/webui/interface.py @@ -1,21 +1,22 @@ -import gradio as gr from typing import Optional + +import gradio as gr from transformers.utils.versions import require_version +from .common import save_config from .components import ( + create_chat_box, + create_eval_tab, + create_export_tab, + create_infer_tab, create_top, create_train_tab, - create_eval_tab, - create_infer_tab, - create_export_tab, - create_chat_box ) -from .common import save_config from .css import CSS from .engine import Engine -require_version("gradio>=3.38.0,<4.0.0", "To fix: pip install \"gradio>=3.38.0,<4.0.0\"") +require_version("gradio>=3.38.0,<4.0.0", 'To fix: pip install "gradio>=3.38.0,<4.0.0"') def create_ui(demo_mode: Optional[bool] = False) -> gr.Blocks: @@ -23,11 +24,9 @@ def create_ui(demo_mode: Optional[bool] = False) -> gr.Blocks: with gr.Blocks(title="LLaMA Board", css=CSS) as demo: if demo_mode: + gr.HTML("

LLaMA Board: A One-stop Web UI for Getting Started with LLaMA Factory

") gr.HTML( - "

LLaMA Board: A One-stop Web UI for Getting Started with LLaMA Factory

" - ) - gr.HTML( - "

Visit " + '

Visit ' "LLaMA Factory for details.

" ) gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button") diff --git a/src/llmtuner/webui/locales.py b/src/llmtuner/webui/locales.py index 60778b67..77f5854e 100644 --- a/src/llmtuner/webui/locales.py +++ b/src/llmtuner/webui/locales.py @@ -1,726 +1,220 @@ LOCALES = { - "lang": { - "en": { - "label": "Lang" - }, - "zh": { - "label": "语言" - } - }, - "model_name": { - "en": { - "label": "Model name" - }, - "zh": { - "label": "模型名称" - } - }, + "lang": {"en": {"label": "Lang"}, "zh": {"label": "语言"}}, + "model_name": {"en": {"label": "Model name"}, "zh": {"label": "模型名称"}}, "model_path": { - "en": { - "label": "Model path", - "info": "Path to pretrained model or model identifier from Hugging Face." - }, - "zh": { - "label": "模型路径", - "info": "本地模型的文件路径或 Hugging Face 的模型标识符。" - } - }, - "finetuning_type": { - "en": { - "label": "Finetuning method" - }, - "zh": { - "label": "微调方法" - } - }, - "adapter_path": { - "en": { - "label": "Adapter path" - }, - "zh": { - "label": "适配器路径" - } - }, - "refresh_btn": { - "en": { - "value": "Refresh adapters" - }, - "zh": { - "value": "刷新适配器" - } - }, - "advanced_tab": { - "en": { - "label": "Advanced configurations" - }, - "zh": { - "label": "高级设置" - } + "en": {"label": "Model path", "info": "Path to pretrained model or model identifier from Hugging Face."}, + "zh": {"label": "模型路径", "info": "本地模型的文件路径或 Hugging Face 的模型标识符。"}, }, + "finetuning_type": {"en": {"label": "Finetuning method"}, "zh": {"label": "微调方法"}}, + "adapter_path": {"en": {"label": "Adapter path"}, "zh": {"label": "适配器路径"}}, + "refresh_btn": {"en": {"value": "Refresh adapters"}, "zh": {"value": "刷新适配器"}}, + "advanced_tab": {"en": {"label": "Advanced configurations"}, "zh": {"label": "高级设置"}}, "quantization_bit": { - "en": { - "label": "Quantization bit", - "info": "Enable 4/8-bit model quantization (QLoRA)." - }, - "zh": { - "label": "量化等级", - "info": "启用 4/8 比特模型量化(QLoRA)。" - } + "en": {"label": "Quantization bit", "info": "Enable 4/8-bit model quantization (QLoRA)."}, + "zh": {"label": "量化等级", "info": "启用 4/8 比特模型量化(QLoRA)。"}, }, "template": { - "en": { - "label": "Prompt template", - "info": "The template used in constructing prompts." - }, - "zh": { - "label": "提示模板", - "info": "构建提示词时使用的模板" - } - }, - "rope_scaling": { - "en": { - "label": "RoPE scaling" - }, - "zh": { - "label": "RoPE 插值方法" - } - }, - "booster": { - "en": { - "label": "Booster" - }, - "zh": { - "label": "加速方式" - } + "en": {"label": "Prompt template", "info": "The template used in constructing prompts."}, + "zh": {"label": "提示模板", "info": "构建提示词时使用的模板"}, }, + "rope_scaling": {"en": {"label": "RoPE scaling"}, "zh": {"label": "RoPE 插值方法"}}, + "booster": {"en": {"label": "Booster"}, "zh": {"label": "加速方式"}}, "training_stage": { - "en": { - "label": "Stage", - "info": "The stage to perform in training." - }, - "zh": { - "label": "训练阶段", - "info": "目前采用的训练方式。" - } + "en": {"label": "Stage", "info": "The stage to perform in training."}, + "zh": {"label": "训练阶段", "info": "目前采用的训练方式。"}, }, "dataset_dir": { - "en": { - "label": "Data dir", - "info": "Path to the data directory." - }, - "zh": { - "label": "数据路径", - "info": "数据文件夹的路径。" - } - }, - "dataset": { - "en": { - "label": "Dataset" - }, - "zh": { - "label": "数据集" - } - }, - "data_preview_btn": { - "en": { - "value": "Preview dataset" - }, - "zh": { - "value": "预览数据集" - } - }, - "preview_count": { - "en": { - "label": "Count" - }, - "zh": { - "label": "数量" - } - }, - "page_index": { - "en": { - "label": "Page" - }, - "zh": { - "label": "页数" - } - }, - "prev_btn": { - "en": { - "value": "Prev" - }, - "zh": { - "value": "上一页" - } - }, - "next_btn": { - "en": { - "value": "Next" - }, - "zh": { - "value": "下一页" - } - }, - "close_btn": { - "en": { - "value": "Close" - }, - "zh": { - "value": "关闭" - } - }, - "preview_samples": { - "en": { - "label": "Samples" - }, - "zh": { - "label": "样例" - } + "en": {"label": "Data dir", "info": "Path to the data directory."}, + "zh": {"label": "数据路径", "info": "数据文件夹的路径。"}, }, + "dataset": {"en": {"label": "Dataset"}, "zh": {"label": "数据集"}}, + "data_preview_btn": {"en": {"value": "Preview dataset"}, "zh": {"value": "预览数据集"}}, + "preview_count": {"en": {"label": "Count"}, "zh": {"label": "数量"}}, + "page_index": {"en": {"label": "Page"}, "zh": {"label": "页数"}}, + "prev_btn": {"en": {"value": "Prev"}, "zh": {"value": "上一页"}}, + "next_btn": {"en": {"value": "Next"}, "zh": {"value": "下一页"}}, + "close_btn": {"en": {"value": "Close"}, "zh": {"value": "关闭"}}, + "preview_samples": {"en": {"label": "Samples"}, "zh": {"label": "样例"}}, "cutoff_len": { - "en": { - "label": "Cutoff length", - "info": "Max tokens in input sequence." - }, - "zh": { - "label": "截断长度", - "info": "输入序列分词后的最大长度。" - } + "en": {"label": "Cutoff length", "info": "Max tokens in input sequence."}, + "zh": {"label": "截断长度", "info": "输入序列分词后的最大长度。"}, }, "learning_rate": { - "en": { - "label": "Learning rate", - "info": "Initial learning rate for AdamW." - }, - "zh": { - "label": "学习率", - "info": "AdamW 优化器的初始学习率。" - } + "en": {"label": "Learning rate", "info": "Initial learning rate for AdamW."}, + "zh": {"label": "学习率", "info": "AdamW 优化器的初始学习率。"}, }, "num_train_epochs": { - "en": { - "label": "Epochs", - "info": "Total number of training epochs to perform." - }, - "zh": { - "label": "训练轮数", - "info": "需要执行的训练总轮数。" - } + "en": {"label": "Epochs", "info": "Total number of training epochs to perform."}, + "zh": {"label": "训练轮数", "info": "需要执行的训练总轮数。"}, }, "max_samples": { - "en": { - "label": "Max samples", - "info": "Maximum samples per dataset." - }, - "zh": { - "label": "最大样本数", - "info": "每个数据集最多使用的样本数。" - } + "en": {"label": "Max samples", "info": "Maximum samples per dataset."}, + "zh": {"label": "最大样本数", "info": "每个数据集最多使用的样本数。"}, }, "compute_type": { - "en": { - "label": "Compute type", - "info": "Whether to use fp16 or bf16 mixed precision training." - }, - "zh": { - "label": "计算类型", - "info": "是否启用 FP16 或 BF16 混合精度训练。" - } + "en": {"label": "Compute type", "info": "Whether to use fp16 or bf16 mixed precision training."}, + "zh": {"label": "计算类型", "info": "是否启用 FP16 或 BF16 混合精度训练。"}, }, "batch_size": { - "en": { - "label": "Batch size", - "info": "Number of samples to process per GPU." - }, - "zh":{ - "label": "批处理大小", - "info": "每块 GPU 上处理的样本数量。" - } + "en": {"label": "Batch size", "info": "Number of samples to process per GPU."}, + "zh": {"label": "批处理大小", "info": "每块 GPU 上处理的样本数量。"}, }, "gradient_accumulation_steps": { - "en": { - "label": "Gradient accumulation", - "info": "Number of gradient accumulation steps." - }, - "zh": { - "label": "梯度累积", - "info": "梯度累积的步数。" - } + "en": {"label": "Gradient accumulation", "info": "Number of gradient accumulation steps."}, + "zh": {"label": "梯度累积", "info": "梯度累积的步数。"}, }, "lr_scheduler_type": { "en": { "label": "LR Scheduler", "info": "Name of learning rate scheduler.", }, - "zh": { - "label": "学习率调节器", - "info": "采用的学习率调节器名称。" - } + "zh": {"label": "学习率调节器", "info": "采用的学习率调节器名称。"}, }, "max_grad_norm": { - "en": { - "label": "Maximum gradient norm", - "info": "Norm for gradient clipping.." - }, - "zh": { - "label": "最大梯度范数", - "info": "用于梯度裁剪的范数。" - } + "en": {"label": "Maximum gradient norm", "info": "Norm for gradient clipping.."}, + "zh": {"label": "最大梯度范数", "info": "用于梯度裁剪的范数。"}, }, "val_size": { - "en": { - "label": "Val size", - "info": "Proportion of data in the dev set." - }, - "zh": { - "label": "验证集比例", - "info": "验证集占全部样本的百分比。" - } - }, - "extra_tab": { - "en": { - "label": "Extra configurations" - }, - "zh": { - "label": "其它参数设置" - } + "en": {"label": "Val size", "info": "Proportion of data in the dev set."}, + "zh": {"label": "验证集比例", "info": "验证集占全部样本的百分比。"}, }, + "extra_tab": {"en": {"label": "Extra configurations"}, "zh": {"label": "其它参数设置"}}, "logging_steps": { - "en": { - "label": "Logging steps", - "info": "Number of steps between two logs." - }, - "zh": { - "label": "日志间隔", - "info": "每两次日志输出间的更新步数。" - } + "en": {"label": "Logging steps", "info": "Number of steps between two logs."}, + "zh": {"label": "日志间隔", "info": "每两次日志输出间的更新步数。"}, }, "save_steps": { - "en": { - "label": "Save steps", - "info": "Number of steps between two checkpoints." - }, - "zh": { - "label": "保存间隔", - "info": "每两次断点保存间的更新步数。" - } + "en": {"label": "Save steps", "info": "Number of steps between two checkpoints."}, + "zh": {"label": "保存间隔", "info": "每两次断点保存间的更新步数。"}, }, "warmup_steps": { - "en": { - "label": "Warmup steps", - "info": "Number of steps used for warmup." - }, - "zh": { - "label": "预热步数", - "info": "学习率预热采用的步数。" - } + "en": {"label": "Warmup steps", "info": "Number of steps used for warmup."}, + "zh": {"label": "预热步数", "info": "学习率预热采用的步数。"}, }, "neftune_alpha": { - "en": { - "label": "NEFTune Alpha", - "info": "Magnitude of noise adding to embedding vectors." - }, - "zh": { - "label": "NEFTune 噪声参数", - "info": "嵌入向量所添加的噪声大小。" - } + "en": {"label": "NEFTune Alpha", "info": "Magnitude of noise adding to embedding vectors."}, + "zh": {"label": "NEFTune 噪声参数", "info": "嵌入向量所添加的噪声大小。"}, }, "sft_packing": { "en": { "label": "Pack sequences", - "info": "Pack sequences into samples of fixed length in supervised fine-tuning." + "info": "Pack sequences into samples of fixed length in supervised fine-tuning.", }, - "zh": { - "label": "序列打包", - "info": "在有监督微调阶段将序列打包为相同长度的样本。" - } + "zh": {"label": "序列打包", "info": "在有监督微调阶段将序列打包为相同长度的样本。"}, }, "upcast_layernorm": { - "en": { - "label": "Upcast LayerNorm", - "info": "Upcast weights of layernorm in float32." - }, - "zh": { - "label": "缩放归一化层", - "info": "将归一化层权重缩放至 32 位精度。" - } - }, - "lora_tab": { - "en": { - "label": "LoRA configurations" - }, - "zh": { - "label": "LoRA 参数设置" - } + "en": {"label": "Upcast LayerNorm", "info": "Upcast weights of layernorm in float32."}, + "zh": {"label": "缩放归一化层", "info": "将归一化层权重缩放至 32 位精度。"}, }, + "lora_tab": {"en": {"label": "LoRA configurations"}, "zh": {"label": "LoRA 参数设置"}}, "lora_rank": { - "en": { - "label": "LoRA rank", - "info": "The rank of LoRA matrices." - }, - "zh": { - "label": "LoRA 秩", - "info": "LoRA 矩阵的秩。" - } + "en": {"label": "LoRA rank", "info": "The rank of LoRA matrices."}, + "zh": {"label": "LoRA 秩", "info": "LoRA 矩阵的秩。"}, }, "lora_dropout": { - "en": { - "label": "LoRA Dropout", - "info": "Dropout ratio of LoRA weights." - }, - "zh": { - "label": "LoRA 随机丢弃", - "info": "LoRA 权重随机丢弃的概率。" - } + "en": {"label": "LoRA Dropout", "info": "Dropout ratio of LoRA weights."}, + "zh": {"label": "LoRA 随机丢弃", "info": "LoRA 权重随机丢弃的概率。"}, }, "lora_target": { "en": { "label": "LoRA modules (optional)", - "info": "Name(s) of target modules to apply LoRA. Use commas to separate multiple modules." + "info": "Name(s) of target modules to apply LoRA. Use commas to separate multiple modules.", }, - "zh": { - "label": "LoRA 作用模块(非必填)", - "info": "应用 LoRA 的目标模块名称。使用英文逗号分隔多个名称。" - } + "zh": {"label": "LoRA 作用模块(非必填)", "info": "应用 LoRA 的目标模块名称。使用英文逗号分隔多个名称。"}, }, "additional_target": { "en": { "label": "Additional modules (optional)", - "info": "Name(s) of modules apart from LoRA layers to be set as trainable. Use commas to separate multiple modules." + "info": "Name(s) of modules apart from LoRA layers to be set as trainable. Use commas to separate multiple modules.", }, - "zh": { - "label": "附加模块(非必填)", - "info": "除 LoRA 层以外的可训练模块名称。使用英文逗号分隔多个名称。" - } + "zh": {"label": "附加模块(非必填)", "info": "除 LoRA 层以外的可训练模块名称。使用英文逗号分隔多个名称。"}, }, "create_new_adapter": { "en": { "label": "Create new adapter", - "info": "Whether to create a new adapter with randomly initialized weight or not." + "info": "Whether to create a new adapter with randomly initialized weight or not.", }, - "zh": { - "label": "新建适配器", - "info": "是否创建一个经过随机初始化的新适配器。" - } - }, - "rlhf_tab": { - "en": { - "label": "RLHF configurations" - }, - "zh": { - "label": "RLHF 参数设置" - } + "zh": {"label": "新建适配器", "info": "是否创建一个经过随机初始化的新适配器。"}, }, + "rlhf_tab": {"en": {"label": "RLHF configurations"}, "zh": {"label": "RLHF 参数设置"}}, "dpo_beta": { - "en": { - "label": "DPO beta", - "info": "Value of the beta parameter in the DPO loss." - }, - "zh": { - "label": "DPO beta 参数", - "info": "DPO 损失函数中 beta 超参数大小。" - } + "en": {"label": "DPO beta", "info": "Value of the beta parameter in the DPO loss."}, + "zh": {"label": "DPO beta 参数", "info": "DPO 损失函数中 beta 超参数大小。"}, }, "dpo_ftx": { - "en": { - "label": "DPO-ftx weight", - "info": "The weight of SFT loss in the DPO-ftx." - }, - "zh": { - "label": "DPO-ftx 权重", - "info": "DPO-ftx 中 SFT 损失的权重大小。" - } + "en": {"label": "DPO-ftx weight", "info": "The weight of SFT loss in the DPO-ftx."}, + "zh": {"label": "DPO-ftx 权重", "info": "DPO-ftx 中 SFT 损失的权重大小。"}, }, "reward_model": { "en": { "label": "Reward model", - "info": "Adapter of the reward model for PPO training. (Needs to refresh adapters)" + "info": "Adapter of the reward model for PPO training. (Needs to refresh adapters)", }, - "zh": { - "label": "奖励模型", - "info": "PPO 训练中奖励模型的适配器路径。(需要刷新适配器)" - } - }, - "cmd_preview_btn": { - "en": { - "value": "Preview command" - }, - "zh": { - "value": "预览命令" - } - }, - "start_btn": { - "en": { - "value": "Start" - }, - "zh": { - "value": "开始" - } - }, - "stop_btn": { - "en": { - "value": "Abort" - }, - "zh": { - "value": "中断" - } + "zh": {"label": "奖励模型", "info": "PPO 训练中奖励模型的适配器路径。(需要刷新适配器)"}, }, + "cmd_preview_btn": {"en": {"value": "Preview command"}, "zh": {"value": "预览命令"}}, + "start_btn": {"en": {"value": "Start"}, "zh": {"value": "开始"}}, + "stop_btn": {"en": {"value": "Abort"}, "zh": {"value": "中断"}}, "output_dir": { - "en": { - "label": "Output dir", - "info": "Directory for saving results." - }, - "zh": { - "label": "输出目录", - "info": "保存结果的路径。" - } - }, - "output_box": { - "en": { - "value": "Ready." - }, - "zh": { - "value": "准备就绪。" - } - }, - "loss_viewer": { - "en": { - "label": "Loss" - }, - "zh": { - "label": "损失" - } - }, - "predict": { - "en": { - "label": "Save predictions" - }, - "zh": { - "label": "保存预测结果" - } - }, - "load_btn": { - "en": { - "value": "Load model" - }, - "zh": { - "value": "加载模型" - } - }, - "unload_btn": { - "en": { - "value": "Unload model" - }, - "zh": { - "value": "卸载模型" - } - }, - "info_box": { - "en": { - "value": "Model unloaded, please load a model first." - }, - "zh": { - "value": "模型未加载,请先加载模型。" - } - }, - "system": { - "en": { - "placeholder": "System prompt (optional)" - }, - "zh": { - "placeholder": "系统提示词(非必填)" - } - }, - "tools": { - "en": { - "placeholder": "Tools (optional)" - }, - "zh": { - "placeholder": "工具列表(非必填)" - } - }, - "query": { - "en": { - "placeholder": "Input..." - }, - "zh": { - "placeholder": "输入..." - } - }, - "submit_btn": { - "en": { - "value": "Submit" - }, - "zh": { - "value": "提交" - } - }, - "clear_btn": { - "en": { - "value": "Clear history" - }, - "zh": { - "value": "清空历史" - } - }, - "max_length": { - "en": { - "label": "Maximum length" - }, - "zh": { - "label": "最大长度" - } - }, - "max_new_tokens": { - "en": { - "label": "Maximum new tokens" - }, - "zh": { - "label": "最大生成长度" - } - }, - "top_p": { - "en": { - "label": "Top-p" - }, - "zh": { - "label": "Top-p 采样值" - } - }, - "temperature": { - "en": { - "label": "Temperature" - }, - "zh": { - "label": "温度系数" - } + "en": {"label": "Output dir", "info": "Directory for saving results."}, + "zh": {"label": "输出目录", "info": "保存结果的路径。"}, }, + "output_box": {"en": {"value": "Ready."}, "zh": {"value": "准备就绪。"}}, + "loss_viewer": {"en": {"label": "Loss"}, "zh": {"label": "损失"}}, + "predict": {"en": {"label": "Save predictions"}, "zh": {"label": "保存预测结果"}}, + "load_btn": {"en": {"value": "Load model"}, "zh": {"value": "加载模型"}}, + "unload_btn": {"en": {"value": "Unload model"}, "zh": {"value": "卸载模型"}}, + "info_box": {"en": {"value": "Model unloaded, please load a model first."}, "zh": {"value": "模型未加载,请先加载模型。"}}, + "system": {"en": {"placeholder": "System prompt (optional)"}, "zh": {"placeholder": "系统提示词(非必填)"}}, + "tools": {"en": {"placeholder": "Tools (optional)"}, "zh": {"placeholder": "工具列表(非必填)"}}, + "query": {"en": {"placeholder": "Input..."}, "zh": {"placeholder": "输入..."}}, + "submit_btn": {"en": {"value": "Submit"}, "zh": {"value": "提交"}}, + "clear_btn": {"en": {"value": "Clear history"}, "zh": {"value": "清空历史"}}, + "max_length": {"en": {"label": "Maximum length"}, "zh": {"label": "最大长度"}}, + "max_new_tokens": {"en": {"label": "Maximum new tokens"}, "zh": {"label": "最大生成长度"}}, + "top_p": {"en": {"label": "Top-p"}, "zh": {"label": "Top-p 采样值"}}, + "temperature": {"en": {"label": "Temperature"}, "zh": {"label": "温度系数"}}, "max_shard_size": { - "en": { - "label": "Max shard size (GB)", - "info": "The maximum size for a model file." - }, - "zh": { - "label": "最大分块大小(GB)", - "info": "单个模型文件的最大大小。" - } + "en": {"label": "Max shard size (GB)", "info": "The maximum size for a model file."}, + "zh": {"label": "最大分块大小(GB)", "info": "单个模型文件的最大大小。"}, }, "export_quantization_bit": { - "en": { - "label": "Export quantization bit.", - "info": "Quantizing the exported model." - }, - "zh": { - "label": "导出量化等级", - "info": "量化导出模型。" - } + "en": {"label": "Export quantization bit.", "info": "Quantizing the exported model."}, + "zh": {"label": "导出量化等级", "info": "量化导出模型。"}, }, "export_quantization_dataset": { - "en": { - "label": "Export quantization dataset.", - "info": "The calibration dataset used for quantization." - }, - "zh": { - "label": "导出量化数据集", - "info": "量化过程中使用的校准数据集。" - } + "en": {"label": "Export quantization dataset.", "info": "The calibration dataset used for quantization."}, + "zh": {"label": "导出量化数据集", "info": "量化过程中使用的校准数据集。"}, }, "export_dir": { - "en": { - "label": "Export dir", - "info": "Directory to save exported model." - }, - "zh": { - "label": "导出目录", - "info": "保存导出模型的文件夹路径。" - } + "en": {"label": "Export dir", "info": "Directory to save exported model."}, + "zh": {"label": "导出目录", "info": "保存导出模型的文件夹路径。"}, }, - "export_btn": { - "en": { - "value": "Export" - }, - "zh": { - "value": "开始导出" - } - } + "export_btn": {"en": {"value": "Export"}, "zh": {"value": "开始导出"}}, } ALERTS = { - "err_conflict": { - "en": "A process is in running, please abort it firstly.", - "zh": "任务已存在,请先中断训练。" - }, - "err_exists": { - "en": "You have loaded a model, please unload it first.", - "zh": "模型已存在,请先卸载模型。" - }, - "err_no_model": { - "en": "Please select a model.", - "zh": "请选择模型。" - }, - "err_no_path": { - "en": "Model not found.", - "zh": "模型未找到。" - }, - "err_no_dataset": { - "en": "Please choose a dataset.", - "zh": "请选择数据集。" - }, - "err_no_adapter": { - "en": "Please select an adapter.", - "zh": "请选择一个适配器。" - }, - "err_no_export_dir": { - "en": "Please provide export dir.", - "zh": "请填写导出目录" - }, - "err_failed": { - "en": "Failed.", - "zh": "训练出错。" - }, + "err_conflict": {"en": "A process is in running, please abort it firstly.", "zh": "任务已存在,请先中断训练。"}, + "err_exists": {"en": "You have loaded a model, please unload it first.", "zh": "模型已存在,请先卸载模型。"}, + "err_no_model": {"en": "Please select a model.", "zh": "请选择模型。"}, + "err_no_path": {"en": "Model not found.", "zh": "模型未找到。"}, + "err_no_dataset": {"en": "Please choose a dataset.", "zh": "请选择数据集。"}, + "err_no_adapter": {"en": "Please select an adapter.", "zh": "请选择一个适配器。"}, + "err_no_export_dir": {"en": "Please provide export dir.", "zh": "请填写导出目录"}, + "err_failed": {"en": "Failed.", "zh": "训练出错。"}, "err_demo": { "en": "Training is unavailable in demo mode, duplicate the space to a private one first.", - "zh": "展示模式不支持训练,请先复制到私人空间。" + "zh": "展示模式不支持训练,请先复制到私人空间。", }, - "err_device_count": { - "en": "Multiple GPUs are not supported yet.", - "zh": "尚不支持多 GPU 训练。" - }, - "info_aborting": { - "en": "Aborted, wait for terminating...", - "zh": "训练中断,正在等待线程结束……" - }, - "info_aborted": { - "en": "Ready.", - "zh": "准备就绪。" - }, - "info_finished": { - "en": "Finished.", - "zh": "训练完毕。" - }, - "info_loading": { - "en": "Loading model...", - "zh": "加载中……" - }, - "info_unloading": { - "en": "Unloading model...", - "zh": "卸载中……" - }, - "info_loaded": { - "en": "Model loaded, now you can chat with your model!", - "zh": "模型已加载,可以开始聊天了!" - }, - "info_unloaded": { - "en": "Model unloaded.", - "zh": "模型已卸载。" - }, - "info_exporting": { - "en": "Exporting model...", - "zh": "正在导出模型……" - }, - "info_exported": { - "en": "Model exported.", - "zh": "模型导出完成。" - } + "err_device_count": {"en": "Multiple GPUs are not supported yet.", "zh": "尚不支持多 GPU 训练。"}, + "info_aborting": {"en": "Aborted, wait for terminating...", "zh": "训练中断,正在等待线程结束……"}, + "info_aborted": {"en": "Ready.", "zh": "准备就绪。"}, + "info_finished": {"en": "Finished.", "zh": "训练完毕。"}, + "info_loading": {"en": "Loading model...", "zh": "加载中……"}, + "info_unloading": {"en": "Unloading model...", "zh": "卸载中……"}, + "info_loaded": {"en": "Model loaded, now you can chat with your model!", "zh": "模型已加载,可以开始聊天了!"}, + "info_unloaded": {"en": "Model unloaded.", "zh": "模型已卸载。"}, + "info_exporting": {"en": "Exporting model...", "zh": "正在导出模型……"}, + "info_exported": {"en": "Model exported.", "zh": "模型导出完成。"}, } diff --git a/src/llmtuner/webui/manager.py b/src/llmtuner/webui/manager.py index 18767f0d..51ddf491 100644 --- a/src/llmtuner/webui/manager.py +++ b/src/llmtuner/webui/manager.py @@ -1,11 +1,11 @@ from typing import TYPE_CHECKING, Dict, List, Set + if TYPE_CHECKING: from gradio.components import Component class Manager: - def __init__(self) -> None: self.all_elems: Dict[str, Dict[str, "Component"]] = {} @@ -26,7 +26,7 @@ class Manager: self.all_elems["top"]["quantization_bit"], self.all_elems["top"]["template"], self.all_elems["top"]["rope_scaling"], - self.all_elems["top"]["booster"] + self.all_elems["top"]["booster"], } def list_elems(self) -> List["Component"]: diff --git a/src/llmtuner/webui/runner.py b/src/llmtuner/webui/runner.py index 48c22214..aac8746b 100644 --- a/src/llmtuner/webui/runner.py +++ b/src/llmtuner/webui/runner.py @@ -1,12 +1,12 @@ +import logging import os import time -import logging -import gradio as gr from threading import Thread -from gradio.components import Component # cannot use TYPE_CHECKING here from typing import TYPE_CHECKING, Any, Dict, Generator, Optional, Tuple +import gradio as gr import transformers +from gradio.components import Component # cannot use TYPE_CHECKING here from transformers.trainer import TRAINING_ARGS_NAME from ..extras.callbacks import LogCallback @@ -18,12 +18,12 @@ from .common import get_module, get_save_dir, load_config from .locales import ALERTS from .utils import gen_cmd, get_eval_results, update_process_bar + if TYPE_CHECKING: from .manager import Manager class Runner: - def __init__(self, manager: "Manager", demo_mode: Optional[bool] = False) -> None: self.manager = manager self.demo_mode = demo_mode @@ -90,9 +90,12 @@ class Runner: user_config = load_config() if get("top.adapter_path"): - adapter_name_or_path = ",".join([ - get_save_dir(get("top.model_name"), get("top.finetuning_type"), adapter) - for adapter in get("top.adapter_path")]) + adapter_name_or_path = ",".join( + [ + get_save_dir(get("top.model_name"), get("top.finetuning_type"), adapter) + for adapter in get("top.adapter_path") + ] + ) else: adapter_name_or_path = None @@ -131,12 +134,12 @@ class Runner: create_new_adapter=get("train.create_new_adapter"), output_dir=get_save_dir(get("top.model_name"), get("top.finetuning_type"), get("train.output_dir")), fp16=(get("train.compute_type") == "fp16"), - bf16=(get("train.compute_type") == "bf16") + bf16=(get("train.compute_type") == "bf16"), ) args["disable_tqdm"] = True if TRAINING_STAGES[get("train.training_stage")] in ["rm", "ppo", "dpo"]: - args["create_new_adapter"] = (args["quantization_bit"] is None) + args["create_new_adapter"] = args["quantization_bit"] is None if args["stage"] == "ppo": args["reward_model"] = get_save_dir( @@ -161,9 +164,12 @@ class Runner: user_config = load_config() if get("top.adapter_path"): - adapter_name_or_path = ",".join([ - get_save_dir(get("top.model_name"), get("top.finetuning_type"), adapter) - for adapter in get("top.adapter_path")]) + adapter_name_or_path = ",".join( + [ + get_save_dir(get("top.model_name"), get("top.finetuning_type"), adapter) + for adapter in get("top.adapter_path") + ] + ) else: adapter_name_or_path = None @@ -187,7 +193,7 @@ class Runner: max_new_tokens=get("eval.max_new_tokens"), top_p=get("eval.top_p"), temperature=get("eval.temperature"), - output_dir=get_save_dir(get("top.model_name"), get("top.finetuning_type"), get("eval.output_dir")) + output_dir=get_save_dir(get("top.model_name"), get("top.finetuning_type"), get("eval.output_dir")), ) if get("eval.predict"): @@ -197,7 +203,9 @@ class Runner: return args - def _preview(self, data: Dict[Component, Any], do_train: bool) -> Generator[Tuple[str, Dict[str, Any]], None, None]: + def _preview( + self, data: Dict[Component, Any], do_train: bool + ) -> Generator[Tuple[str, Dict[str, Any]], None, None]: error = self._initialize(data, do_train, from_preview=True) if error: gr.Warning(error) @@ -235,9 +243,11 @@ class Runner: get = lambda name: self.running_data[self.manager.get_elem_by_name(name)] self.running = True lang = get("top.lang") - output_dir = get_save_dir(get("top.model_name"), get("top.finetuning_type"), get( - "{}.output_dir".format("train" if self.do_train else "eval") - )) + output_dir = get_save_dir( + get("top.model_name"), + get("top.finetuning_type"), + get("{}.output_dir".format("train" if self.do_train else "eval")), + ) while self.thread.is_alive(): time.sleep(2) diff --git a/src/llmtuner/webui/utils.py b/src/llmtuner/webui/utils.py index 6bd80093..c94fa3e6 100644 --- a/src/llmtuner/webui/utils.py +++ b/src/llmtuner/webui/utils.py @@ -1,13 +1,15 @@ -import os import json -import gradio as gr -from typing import TYPE_CHECKING, Any, Dict +import os from datetime import datetime +from typing import TYPE_CHECKING, Any, Dict + +import gradio as gr from ..extras.packages import is_matplotlib_available from ..extras.ploting import smooth from .common import get_save_dir + if TYPE_CHECKING: from ..extras.callbacks import LogCallback @@ -22,16 +24,13 @@ def update_process_bar(callback: "LogCallback") -> Dict[str, Any]: percentage = round(100 * callback.cur_steps / callback.max_steps, 0) if callback.max_steps != 0 else 100.0 label = "Running {:d}/{:d}: {} < {}".format( - callback.cur_steps, - callback.max_steps, - callback.elapsed_time, - callback.remaining_time + callback.cur_steps, callback.max_steps, callback.elapsed_time, callback.remaining_time ) return gr.update(label=label, value=percentage, visible=True) def get_time() -> str: - return datetime.now().strftime('%Y-%m-%d-%H-%M-%S') + return datetime.now().strftime("%Y-%m-%d-%H-%M-%S") def can_quantize(finetuning_type: str) -> Dict[str, Any]: diff --git a/tests/cal_flops.py b/tests/cal_flops.py index ff0db0a2..35d98254 100644 --- a/tests/cal_flops.py +++ b/tests/cal_flops.py @@ -3,11 +3,12 @@ # Usage: python cal_flops.py --model_name_or_path path_to_model --batch_size 1 --seq_length 512 # Inspired by: https://www.deepspeed.ai/tutorials/flops-profiler/ +from typing import Optional + import fire import torch -from typing import Optional -from deepspeed.accelerator import get_accelerator # type: ignore -from deepspeed.profiling.flops_profiler import get_model_profile # type: ignore +from deepspeed.accelerator import get_accelerator # type: ignore +from deepspeed.profiling.flops_profiler import get_model_profile # type: ignore from llmtuner import ChatModel @@ -16,25 +17,13 @@ def calculate_flops( model_name_or_path: str, batch_size: Optional[int] = 1, seq_length: Optional[int] = 256, - flash_attn: Optional[bool] = False + flash_attn: Optional[bool] = False, ): with get_accelerator().device(0): - chat_model = ChatModel(dict( - model_name_or_path=model_name_or_path, - template="vanilla", - flash_attn=flash_attn - )) + chat_model = ChatModel(dict(model_name_or_path=model_name_or_path, template="vanilla", flash_attn=flash_attn)) fake_input = torch.ones((batch_size, seq_length), dtype=torch.long, device=chat_model.model.device) - input_dict = { - "input_ids": fake_input, - "labels": fake_input.clone() - } - flops, macs, params = get_model_profile( - chat_model.model, - kwargs=input_dict, - print_profile=True, - detailed=True - ) + input_dict = {"input_ids": fake_input, "labels": fake_input.clone()} + flops, macs, params = get_model_profile(chat_model.model, kwargs=input_dict, print_profile=True, detailed=True) print("FLOPs:", flops) print("MACs:", macs) print("Params:", params) diff --git a/tests/cal_lr.py b/tests/cal_lr.py index 2e74e3f4..8c5cd909 100644 --- a/tests/cal_lr.py +++ b/tests/cal_lr.py @@ -3,12 +3,13 @@ # Usage: python cal_lr.py --model_name_or_path path_to_model --dataset alpaca_en --cutoff_len 1024 --batch_size 16 # Inspired by: https://github.com/imoneoi/openchat/blob/master/ochat/training_deepspeed/train.py -import fire import math -import torch -from tqdm import tqdm from typing import Optional + +import fire +import torch from torch.utils.data import DataLoader +from tqdm import tqdm from transformers import DataCollatorForSeq2Seq from llmtuner.data import get_dataset @@ -17,8 +18,8 @@ from llmtuner.hparams import get_train_args from llmtuner.model import load_model_and_tokenizer -BASE_LR = 3e-4 # 1.5e-4 for 30B-70B models -BASE_BS = 4_000_000 # from llama paper +BASE_LR = 3e-4 # 1.5e-4 for 30B-70B models +BASE_BS = 4_000_000 # from llama paper def calculate_lr( @@ -26,18 +27,20 @@ def calculate_lr( dataset: str, cutoff_len: int, # i.e. maximum input length during training batch_size: int, # total batch size, namely (batch size * gradient accumulation * world size) - is_mistral: bool, # mistral model uses a smaller learning rate, - dataset_dir: Optional[str] = "data" + is_mistral: bool, # mistral model uses a smaller learning rate, + dataset_dir: Optional[str] = "data", ): - model_args, data_args, training_args, finetuning_args, _ = get_train_args(dict( - stage="sft", - model_name_or_path=model_name_or_path, - dataset=dataset, - dataset_dir=dataset_dir, - template="default", - cutoff_len=cutoff_len, - output_dir="dummy_dir" - )) + model_args, data_args, training_args, finetuning_args, _ = get_train_args( + dict( + stage="sft", + model_name_or_path=model_name_or_path, + dataset=dataset, + dataset_dir=dataset_dir, + template="default", + cutoff_len=cutoff_len, + output_dir="dummy_dir", + ) + ) _, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, is_trainable=False, add_valuehead=False) trainset = get_dataset(tokenizer, model_args, data_args, training_args, stage="sft") data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, label_pad_token_id=IGNORE_INDEX) @@ -49,14 +52,16 @@ def calculate_lr( valid_tokens += torch.sum(batch["labels"] != IGNORE_INDEX).item() total_tokens += torch.numel(batch["labels"]) - batch_max_len = cutoff_len * batch_size # max tokens in a batch + batch_max_len = cutoff_len * batch_size # max tokens in a batch valid_ratio = valid_tokens / total_tokens batch_valid_len = batch_max_len * valid_ratio - lr = BASE_LR * math.sqrt(batch_valid_len / BASE_BS) # lr ~ sqrt(batch_size) + lr = BASE_LR * math.sqrt(batch_valid_len / BASE_BS) # lr ~ sqrt(batch_size) lr = lr / 6.0 if is_mistral else lr - print("Optimal learning rate is {:.2e} for valid ratio% {:.2f} and effective batch size {:.2f}".format( - lr, valid_ratio * 100, batch_valid_len - )) + print( + "Optimal learning rate is {:.2e} for valid ratio% {:.2f} and effective batch size {:.2f}".format( + lr, valid_ratio * 100, batch_valid_len + ) + ) if __name__ == "__main__": diff --git a/tests/llamafy_baichuan2.py b/tests/llamafy_baichuan2.py index 77c214eb..91666bae 100644 --- a/tests/llamafy_baichuan2.py +++ b/tests/llamafy_baichuan2.py @@ -4,32 +4,28 @@ # Inspired by: https://huggingface.co/fireballoon/baichuan-llama-7b/blob/main/convert_baichuan_to_llama.py # Converted model: https://huggingface.co/hiyouga/Baichuan2-7B-Base-LLaMAfied -import os -import fire import json -import torch -from tqdm import tqdm +import os from collections import OrderedDict -from safetensors.torch import save_file -from transformers.modeling_utils import ( - shard_checkpoint, - SAFE_WEIGHTS_NAME, - SAFE_WEIGHTS_INDEX_NAME, - WEIGHTS_NAME, - WEIGHTS_INDEX_NAME -) from typing import Any, Dict, Optional +import fire +import torch +from safetensors.torch import save_file +from tqdm import tqdm +from transformers.modeling_utils import ( + SAFE_WEIGHTS_INDEX_NAME, + SAFE_WEIGHTS_NAME, + WEIGHTS_INDEX_NAME, + WEIGHTS_NAME, + shard_checkpoint, +) + CONFIG_NAME = "config.json" -def save_weight( - input_dir: str, - output_dir: str, - shard_size: str, - save_safetensors: bool -): +def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetensors: bool): baichuan2_state_dict: Dict[str, torch.Tensor] = OrderedDict() for filepath in tqdm(os.listdir(input_dir), desc="Load weights"): if os.path.isfile(os.path.join(input_dir, filepath)) and filepath.endswith(".bin"): @@ -41,8 +37,8 @@ def save_weight( if "W_pack" in key: proj_size = value.size(0) // 3 llama2_state_dict[key.replace("W_pack", "q_proj")] = value[:proj_size, :] - llama2_state_dict[key.replace("W_pack", "k_proj")] = value[proj_size:2*proj_size, :] - llama2_state_dict[key.replace("W_pack", "v_proj")] = value[2*proj_size:, :] + llama2_state_dict[key.replace("W_pack", "k_proj")] = value[proj_size : 2 * proj_size, :] + llama2_state_dict[key.replace("W_pack", "v_proj")] = value[2 * proj_size :, :] elif "lm_head" in key: llama2_state_dict[key] = torch.nn.functional.normalize(value) else: @@ -56,7 +52,7 @@ def save_weight( save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"}) else: torch.save(shard, os.path.join(output_dir, shard_file)) - + if index is None: print("Model weights saved in {}".format(os.path.join(output_dir, WEIGHTS_NAME))) else: @@ -66,10 +62,7 @@ def save_weight( print("Model weights saved in {}".format(output_dir)) -def save_config( - input_dir: str, - output_dir: str -): +def save_config(input_dir: str, output_dir: str): with open(os.path.join(input_dir, CONFIG_NAME), "r", encoding="utf-8") as f: llama2_config_dict: Dict[str, Any] = json.load(f) @@ -83,19 +76,14 @@ def save_config( print("Model config saved in {}".format(os.path.join(output_dir, CONFIG_NAME))) -def llamafy_baichuan2( - input_dir: str, - output_dir: str, - shard_size: str, - save_safetensors: Optional[bool] = False -): +def llamafy_baichuan2(input_dir: str, output_dir: str, shard_size: str, save_safetensors: Optional[bool] = False): try: os.makedirs(output_dir, exist_ok=False) except Exception as e: raise print("Output dir already exists", e) save_weight(input_dir, output_dir, shard_size, save_safetensors) - save_config(input_dir, output_dir) + save_config(input_dir, output_dir) if __name__ == "__main__": diff --git a/tests/llamafy_internlm2.py b/tests/llamafy_internlm2.py index 8fb1448c..f1c4c9af 100644 --- a/tests/llamafy_internlm2.py +++ b/tests/llamafy_internlm2.py @@ -3,32 +3,28 @@ # Usage: python llamafy_internlm2.py --input_dir input --output_dir output --shard_size 10GB # Warning: We have found that the converted model cannot infer correctly. It will be fixed later. -import os -import fire import json -import torch -from tqdm import tqdm +import os from collections import OrderedDict -from safetensors.torch import save_file -from transformers.modeling_utils import ( - shard_checkpoint, - SAFE_WEIGHTS_NAME, - SAFE_WEIGHTS_INDEX_NAME, - WEIGHTS_NAME, - WEIGHTS_INDEX_NAME -) from typing import Any, Dict, Optional +import fire +import torch +from safetensors.torch import save_file +from tqdm import tqdm +from transformers.modeling_utils import ( + SAFE_WEIGHTS_INDEX_NAME, + SAFE_WEIGHTS_NAME, + WEIGHTS_INDEX_NAME, + WEIGHTS_NAME, + shard_checkpoint, +) + CONFIG_NAME = "config.json" -def save_weight( - input_dir: str, - output_dir: str, - shard_size: str, - save_safetensors: bool -): +def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetensors: bool): with open(os.path.join(input_dir, CONFIG_NAME), "r", encoding="utf-8") as f: internlm2_config_dict: Dict[str, Any] = json.load(f) @@ -50,8 +46,10 @@ def save_weight( q_size = value.size(0) // (num_q_heads + 2 * num_kv_heads) * num_q_heads kv_size = value.size(0) // (num_q_heads + 2 * num_kv_heads) * num_kv_heads llama2_state_dict[key.replace("attention.wqkv", "self_attn.q_proj")] = value[:q_size, ...] - llama2_state_dict[key.replace("attention.wqkv", "self_attn.k_proj")] = value[q_size:q_size+kv_size, ...] - llama2_state_dict[key.replace("attention.wqkv", "self_attn.v_proj")] = value[q_size+kv_size:, ...] + llama2_state_dict[key.replace("attention.wqkv", "self_attn.k_proj")] = value[ + q_size : q_size + kv_size, ... + ] + llama2_state_dict[key.replace("attention.wqkv", "self_attn.v_proj")] = value[q_size + kv_size :, ...] elif "wo" in key: llama2_state_dict[key.replace("attention.wo", "self_attn.o_proj")] = value elif "attention_norm" in key: @@ -85,10 +83,7 @@ def save_weight( print("Model weights saved in {}".format(output_dir)) -def save_config( - input_dir: str, - output_dir: str -): +def save_config(input_dir: str, output_dir: str): with open(os.path.join(input_dir, CONFIG_NAME), "r", encoding="utf-8") as f: llama2_config_dict: Dict[str, Any] = json.load(f) @@ -103,12 +98,7 @@ def save_config( print("Model config saved in {}".format(os.path.join(output_dir, CONFIG_NAME))) -def llamafy_internlm2( - input_dir: str, - output_dir: str, - shard_size: str, - save_safetensors: Optional[bool] = False -): +def llamafy_internlm2(input_dir: str, output_dir: str, shard_size: str, save_safetensors: Optional[bool] = False): try: os.makedirs(output_dir, exist_ok=False) except Exception as e: diff --git a/tests/llamafy_qwen.py b/tests/llamafy_qwen.py index dd34d507..5b66b7ef 100644 --- a/tests/llamafy_qwen.py +++ b/tests/llamafy_qwen.py @@ -3,39 +3,36 @@ # Usage: python llamafy_qwen.py --input_dir input --output_dir output --shard_size 10GB # Converted model: https://huggingface.co/hiyouga/Qwen-14B-Chat-LLaMAfied -import os -import fire import json -import torch -from tqdm import tqdm +import os from collections import OrderedDict +from typing import Any, Dict, Optional + +import fire +import torch from safetensors import safe_open from safetensors.torch import save_file +from tqdm import tqdm from transformers.modeling_utils import ( - shard_checkpoint, - SAFE_WEIGHTS_NAME, SAFE_WEIGHTS_INDEX_NAME, + SAFE_WEIGHTS_NAME, + WEIGHTS_INDEX_NAME, WEIGHTS_NAME, - WEIGHTS_INDEX_NAME + shard_checkpoint, ) from transformers.utils import check_min_version -from typing import Any, Dict, Optional + try: check_min_version("4.34.0") -except: +except Exception: raise ValueError("Please upgrade `transformers` to 4.34.0") CONFIG_NAME = "config.json" -def save_weight( - input_dir: str, - output_dir: str, - shard_size: str, - save_safetensors: bool -) -> str: +def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetensors: bool) -> str: qwen_state_dict: Dict[str, torch.Tensor] = OrderedDict() for filepath in tqdm(os.listdir(input_dir), desc="Load weights"): if os.path.isfile(os.path.join(input_dir, filepath)) and filepath.endswith(".safetensors"): @@ -57,13 +54,15 @@ def save_weight( if "attn.c_attn" in key: proj_size = value.size(0) // 3 llama2_state_dict[key.replace("attn.c_attn", "self_attn.q_proj")] = value[:proj_size, ...] - llama2_state_dict[key.replace("attn.c_attn", "self_attn.k_proj")] = value[proj_size:2*proj_size, ...] - llama2_state_dict[key.replace("attn.c_attn", "self_attn.v_proj")] = value[2*proj_size:, ...] + llama2_state_dict[key.replace("attn.c_attn", "self_attn.k_proj")] = value[ + proj_size : 2 * proj_size, ... + ] + llama2_state_dict[key.replace("attn.c_attn", "self_attn.v_proj")] = value[2 * proj_size :, ...] elif "attn.c_proj" in key: llama2_state_dict[key.replace("attn.c_proj", "self_attn.o_proj")] = value - llama2_state_dict[key.replace("attn.c_proj.weight", "self_attn.o_proj.bias")] = ( - torch.zeros_like(value[:, 0]).squeeze() - ) + llama2_state_dict[key.replace("attn.c_proj.weight", "self_attn.o_proj.bias")] = torch.zeros_like( + value[:, 0] + ).squeeze() elif "ln_1" in key: llama2_state_dict[key.replace("ln_1", "input_layernorm")] = value elif "ln_2" in key: @@ -99,11 +98,7 @@ def save_weight( return str(torch_dtype).replace("torch.", "") -def save_config( - input_dir: str, - output_dir: str, - torch_dtype: str -): +def save_config(input_dir: str, output_dir: str, torch_dtype: str): with open(os.path.join(input_dir, CONFIG_NAME), "r", encoding="utf-8") as f: qwen_config_dict: Dict[str, Any] = json.load(f) @@ -133,12 +128,7 @@ def save_config( print("Model config saved in {}".format(os.path.join(output_dir, CONFIG_NAME))) -def llamafy_qwen( - input_dir: str, - output_dir: str, - shard_size: str, - save_safetensors: Optional[bool] = False -): +def llamafy_qwen(input_dir: str, output_dir: str, shard_size: str, save_safetensors: Optional[bool] = False): try: os.makedirs(output_dir, exist_ok=False) except Exception as e: diff --git a/tests/loftq_init.py b/tests/loftq_init.py index 637b5c97..be7b07c5 100644 --- a/tests/loftq_init.py +++ b/tests/loftq_init.py @@ -4,12 +4,13 @@ # Inspired by: https://github.com/huggingface/peft/blob/main/examples/loftq_finetuning/quantize_save_load.py import os +from typing import TYPE_CHECKING, Optional + import fire import torch import torch.nn as nn -from typing import TYPE_CHECKING, Optional -from transformers import AutoModelForCausalLM, AutoTokenizer from peft import LoftQConfig, LoraConfig, TaskType, get_peft_model +from transformers import AutoModelForCausalLM, AutoTokenizer if TYPE_CHECKING: @@ -17,7 +18,6 @@ if TYPE_CHECKING: class Shell(nn.Module): - def __init__(self, weight: torch.Tensor, bias: Optional[torch.Tensor] = None): super().__init__() self.weight = nn.Parameter(weight, requires_grad=False) @@ -26,7 +26,7 @@ class Shell(nn.Module): def unwrap_model(model: nn.Module, pattern=".base_layer") -> None: - for name in set([k.split(pattern)[0] for k, _ in model.named_modules() if pattern in k]): + for name in set([k.split(pattern)[0] for k, _ in model.named_modules() if pattern in k]): # noqa: C403 parent_name = ".".join(name.split(".")[:-1]) child_name = name.split(".")[-1] parent_module = model.get_submodule(parent_name) @@ -35,7 +35,7 @@ def unwrap_model(model: nn.Module, pattern=".base_layer") -> None: weight = getattr(base_layer, "weight", None) bias = getattr(base_layer, "bias", None) setattr(parent_module, child_name, Shell(weight, bias)) - + print("Model unwrapped.") @@ -60,7 +60,7 @@ def quantize_loftq( lora_dropout=0.1, target_modules=[name.strip() for name in lora_target.split(",")], init_lora_weights="loftq", - loftq_config=loftq_config + loftq_config=loftq_config, ) # Init LoftQ model