' + elif ith == 1: + html += f'

' + + for i in range(depth-1): + html += prefix0 if last[i] else prefix1 + if depth > 0: + last[-1] = old_last_1 & (ith == 0 or ith == len(module_names)-1) + html += prefix3 if last[-1] else prefix2 + length = len(o.children) + if length > 0: + html += f'' + name = old_name + module_name + if ith > 0: + label = f'[red]{module_name}{o.label[o.label.index("[",1):]}' + else: + label = o.label + html += f'' + if len(module_names) > 1 and ith == 0: + html += '' + html += '
' + html += f'

' + for i, child in enumerate(o.children): + last = last + [i == length-1] + html += dfs(child, depth+1, last, name + ".") + last.pop() + + html += "

" + if ith == 0 or (ith > 1 and ith == len(module_names)-1): + html += "

" + return html + +urls = ( + '/submit/(.*)', 'submit', + '/(.*)', 'hello', +) + +class PortApplication(web.application): + def run(self, port=8080, *middleware): + func = self.wsgifunc(*middleware) + return web.httpserver.runsimple(func, ('0.0.0.0', port)) + +app = PortApplication(urls, globals()) +render = web.template.render(os.path.join(os.path.dirname(__file__), 'templates/')) +names = [] + +class hello: + def GET(self, name): + return render.index(content=html) +class submit: + def GET(self, _): + global names + names = [name.strip("root.") for name in web.input().name.split(";")] + app.stop() + +def interactive(model, port=8888): + tree = Visualization(model).structure_graph(printTree=False) + + global html + html = dfs(tree, 0, [], "") + + print() + print("If on your machine, open the link below for interactive modification.\n " + "If on remote host, you could use port mapping, " + "or run in vscode terminal, which automatically do port mapping for you.") + app.run() + global names + print("modified_modules:") + print(names) + return names diff --git a/opendelta/utils/logging.py b/opendelta/utils/logging.py new file mode 100644 index 0000000..727232d --- /dev/null +++ b/opendelta/utils/logging.py @@ -0,0 +1,278 @@ +# coding=utf-8 +# Copyright 2020 Optuna, Hugging Face +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# OpenDelta copied from Huggingface Transformers +""" Logging utilities.""" + +import logging +import os +import sys +import threading +from logging import CRITICAL # NOQA +from logging import DEBUG # NOQA +from logging import ERROR # NOQA +from logging import FATAL # NOQA +from logging import INFO # NOQA +from logging import NOTSET # NOQA +from logging import WARN # NOQA +from logging import WARNING # NOQA +from typing import Optional + + +_lock = threading.Lock() +_default_handler: Optional[logging.Handler] = None + +log_levels = { + "debug": logging.DEBUG, + "info": logging.INFO, + "warning": logging.WARNING, + "error": logging.ERROR, + "critical": logging.CRITICAL, +} + +_default_log_level = logging.INFO + + +def _get_default_logging_level(): + """ + If TRANSFORMERS_VERBOSITY env var is set to one of the valid choices return that as the new default level. If it is + not - fall back to ``_default_log_level`` + """ + env_level_str = os.getenv("TRANSFORMERS_VERBOSITY", None) + if env_level_str: + if env_level_str in log_levels: + return log_levels[env_level_str] + else: + logging.getLogger().warning( + f"Unknown option TRANSFORMERS_VERBOSITY={env_level_str}, " + f"has to be one of: { ', '.join(log_levels.keys()) }" + ) + return _default_log_level + + +def _get_library_name() -> str: + + return __name__.split(".")[0] + + +def _get_library_root_logger() -> logging.Logger: + + return logging.getLogger(_get_library_name()) + + +def _configure_library_root_logger() -> None: + + global _default_handler + + with _lock: + if _default_handler: + # This library has already configured the library root logger. + return + _default_handler = logging.StreamHandler() # Set sys.stderr as stream. + _default_handler.flush = sys.stderr.flush + formatter = logging.Formatter( + "[%(levelname)s|(OpenDelta)%(module)s:%(lineno)d]%(asctime)s >> %(message)s") + _default_handler.setFormatter(formatter) + + # Apply our default configuration to the library root logger. + library_root_logger = _get_library_root_logger() + library_root_logger.addHandler(_default_handler) + library_root_logger.setLevel(_get_default_logging_level()) + + + library_root_logger.propagate = False + + +def _reset_library_root_logger() -> None: + + global _default_handler + + with _lock: + if not _default_handler: + return + + library_root_logger = _get_library_root_logger() + library_root_logger.removeHandler(_default_handler) + library_root_logger.setLevel(logging.NOTSET) + _default_handler = None + + +def get_log_levels_dict(): + return log_levels + + +def get_logger(name: Optional[str] = None) -> logging.Logger: + """ + Return a logger with the specified name. + This function is not supposed to be directly accessed unless you are writing a custom transformers module. + """ + + if name is None: + name = _get_library_name() + + _configure_library_root_logger() + return logging.getLogger(name) + + +def get_verbosity() -> int: + """ + Return the current level for the 🤗 Transformers's root logger as an int. + Returns: + :obj:`int`: The logging level. + + 🤗 Transformers has following logging levels: + - 50: ``transformers.logging.CRITICAL`` or ``transformers.logging.FATAL`` + - 40: ``transformers.logging.ERROR`` + - 30: ``transformers.logging.WARNING`` or ``transformers.logging.WARN`` + - 20: ``transformers.logging.INFO`` + - 10: ``transformers.logging.DEBUG`` + """ + + _configure_library_root_logger() + return _get_library_root_logger().getEffectiveLevel() + + +def set_verbosity(verbosity: int) -> None: + """ + Set the verbosity level for the 🤗 Transformers's root logger. + Args: + verbosity (:obj:`int`): + Logging level, e.g., one of: + - ``transformers.logging.CRITICAL`` or ``transformers.logging.FATAL`` + - ``transformers.logging.ERROR`` + - ``transformers.logging.WARNING`` or ``transformers.logging.WARN`` + - ``transformers.logging.INFO`` + - ``transformers.logging.DEBUG`` + """ + + _configure_library_root_logger() + _get_library_root_logger().setLevel(verbosity) + + +def set_verbosity_info(): + """Set the verbosity to the ``INFO`` level.""" + return set_verbosity(INFO) + + +def set_verbosity_warning(): + """Set the verbosity to the ``WARNING`` level.""" + return set_verbosity(WARNING) + + +def set_verbosity_debug(): + """Set the verbosity to the ``DEBUG`` level.""" + return set_verbosity(DEBUG) + + +def set_verbosity_error(): + """Set the verbosity to the ``ERROR`` level.""" + return set_verbosity(ERROR) + + +def disable_default_handler() -> None: + """Disable the default handler of the HuggingFace Transformers's root logger.""" + + _configure_library_root_logger() + + assert _default_handler is not None + _get_library_root_logger().removeHandler(_default_handler) + + +def enable_default_handler() -> None: + """Enable the default handler of the HuggingFace Transformers's root logger.""" + + _configure_library_root_logger() + + assert _default_handler is not None + _get_library_root_logger().addHandler(_default_handler) + + +def add_handler(handler: logging.Handler) -> None: + """adds a handler to the HuggingFace Transformers's root logger.""" + + _configure_library_root_logger() + + assert handler is not None + _get_library_root_logger().addHandler(handler) + + +def remove_handler(handler: logging.Handler) -> None: + """removes given handler from the HuggingFace Transformers's root logger.""" + + _configure_library_root_logger() + + assert handler is not None and handler not in _get_library_root_logger().handlers + _get_library_root_logger().removeHandler(handler) + + +def disable_propagation() -> None: + """ + Disable propagation of the library log outputs. Note that log propagation is disabled by default. + """ + + _configure_library_root_logger() + _get_library_root_logger().propagate = False + + +def enable_propagation() -> None: + """ + Enable propagation of the library log outputs. Please disable the HuggingFace Transformers's default handler to + prevent double logging if the root logger has been configured. + """ + + _configure_library_root_logger() + _get_library_root_logger().propagate = True + + +def enable_explicit_format() -> None: + """ + Enable explicit formatting for every HuggingFace Transformers's logger. The explicit formatter is as follows: + ``` + [LEVELNAME|FILENAME|LINE NUMBER] TIME >> MESSAGE + ``` + All handlers currently bound to the root logger are affected by this method. + """ + handlers = _get_library_root_logger().handlers + + for handler in handlers: + formatter = logging.Formatter("[%(levelname)s|%(filename)s:%(lineno)s] %(asctime)s >> %(message)s") + handler.setFormatter(formatter) + + +def reset_format() -> None: + """ + Resets the formatting for HuggingFace Transformers's loggers. + All handlers currently bound to the root logger are affected by this method. + """ + handlers = _get_library_root_logger().handlers + + for handler in handlers: + handler.setFormatter(None) + + +def warning_advice(self, *args, **kwargs): + """ + This method is identical to ``logger.warning()``, but if env var TRANSFORMERS_NO_ADVISORY_WARNINGS=1 is set, this + warning will not be printed + """ + no_advisory_warnings = os.getenv("TRANSFORMERS_NO_ADVISORY_WARNINGS", False) + if no_advisory_warnings: + return + self.warning(*args, **kwargs) + + +logging.Logger.warning_advice = warning_advice + +set_verbosity_debug() \ No newline at end of file diff --git a/opendelta/utils/model_md5.py b/opendelta/utils/model_md5.py new file mode 100644 index 0000000..3666295 --- /dev/null +++ b/opendelta/utils/model_md5.py @@ -0,0 +1,36 @@ +import hashlib + +def gen_model_hash(model, with_parameters=True): + r"""Get model hash (structure and parameter) + """ + str_model_structure = str(model).replace("\n","").replace(" ","").replace("\t","").encode('utf-8') + md5 = hashlib.md5(str_model_structure) + + if with_parameters: + md5 = gen_parameter_hash(model.parameters(), md5=md5) + + md5_code = md5.hexdigest() + return md5_code + + + +def gen_parameter_hash(generator, md5=None): + r"""Get parameter hash. From https://zhuanlan.zhihu.com/p/392942816 + + """ + if md5 is None: + md5 = hashlib.md5() + for arg in generator: + x = arg.data + if hasattr(x, "cpu"): + md5.update(x.cpu().numpy().data.tobytes()) + elif hasattr(x, "numpy"): + md5.update(x.numpy().data.tobytes()) + elif hasattr(x, "data"): + md5.update(x.data.tobytes()) + else: + try: + md5.update(x.encode("utf-8")) + except: + md5.update(str(x).encode("utf-8")) + return md5 \ No newline at end of file diff --git a/opendelta/utils/name_based_addressing.py b/opendelta/utils/name_based_addressing.py new file mode 100644 index 0000000..3de2142 --- /dev/null +++ b/opendelta/utils/name_based_addressing.py @@ -0,0 +1,71 @@ +from typing import List +import re +def superstring_in(str_a: str , list_b: List[str]): + r"""check whether there is any string in list b containing str_a. + + Args: + Returns: + """ + return any(str_a in str_b for str_b in list_b) + +def is_child_key(str_a: str , list_b: List[str]): + r"""check whether a string in ``list_b`` is the child key in ``str_a`` + + Args: + Returns: + """ + return any(str_b in str_a and (str_b==str_a or str_a[len(str_b)]==".") for str_b in list_b) + +def endswith_in(str_a: str , list_b: List[str]): + r"""check whether ``str_a`` has a substring that is in list_b. + + Args: + Returns: + """ + return any(str_a.endswith(str_b) and (str_a==str_b or str_a[-len(str_b)-1] == ".") for str_b in list_b) + +def substring_in(str_a: str , list_b: List[str]): + r"""check whether ``str_a`` has a substring that is in list_b. + + Args: + Returns: + """ + token_a = str_a.split(".") + for str_b in list_b: + token_b = str_b.split(".") + for i in range(len(token_a)-len(token_b)+1): + if "".join(token_a[i:i+len(token_b)]) == "".join(token_b): + return True + return False + +def endswith_in_regex(str_a: str , list_b: List[str]): + r"""check whether ``str_a`` has a substring that is in list_b. + + Args: + Returns: + """ + for str_b in list_b: + ret = re.search(str_b, str_a) + if ret is not None: + b = ret.group() + if ret.span()[1] == len(str_a) and (b == str_a or (str_a==b or str_a[-len(b)-1] == ".")): + # the latter is to judge whether it is a full sub key in the str_a, e.g. str_a=`attn.c_attn` and list_b=[`attn`] will given False + return True + return False + +def substring_in_regex(str_a: str , list_b: List[str]): + r"""check whether ``str_a`` has a substring that is in list_b. + + Args: + Returns: + """ + for str_b in list_b: + ret = re.search(str_b, str_a) + if ret is not None: + b = ret.group() + if (ret.span()[0] == 0 or str_a[ret.span()[0]-1] == ".") and \ + (ret.span()[1] == len(str_a) or str_a[ret.span()[1]] == "."): #and b == str_a and (str_a==b or str_a[-len(b)-1] == "."): + # the latter is to judge whether it is a full sub key in the str_a, e.g. str_a=`attn.c_attn` and list_b=[`attn`] will given False + return True + return False + \ No newline at end of file diff --git a/opendelta/utils/saving_loading_utils.py b/opendelta/utils/saving_loading_utils.py new file mode 100644 index 0000000..eaeac58 --- /dev/null +++ b/opendelta/utils/saving_loading_utils.py @@ -0,0 +1,396 @@ + +from io import RawIOBase +from tarfile import HeaderError +from typing import Union, Optional, Callable +from opendelta.delta_configs import BaseDeltaConfig +from opendelta.utils.model_md5 import gen_model_hash +import torch +import os +from opendelta import logging +import torch.nn as nn +from transformers.file_utils import ( + WEIGHTS_NAME, + PushToHubMixin, + is_offline_mode, + is_remote_url, + hf_bucket_url, + cached_path, + ) +from transformers.utils.dummy_pt_objects import PreTrainedModel +import hashlib + +logger = logging.get_logger(__name__) + +class SaveLoadMixin(PushToHubMixin): + def add_configs_when_saving(self,): + self.config.backbone_class = self.backbone_model.__class__.__name__ + self.config.backbone_checkpoint_name = os.path.split(self.backbone_model.config._name_or_path.strip("/"))[-1] + self.config.backbone_hash = gen_model_hash(self.backbone_model) + + + + + def save_finetuned( + self, + save_directory: Optional[Union[str, os.PathLike]] = "./output/", + save_config: bool = True, + state_dict: Optional[dict] = None, + save_function: Callable = torch.save, + push_to_hub: bool = False, + **kwargs, + ): + r""" + Save a model and its configuration file to a directory, so that it can be re-loaded using the + :py:meth:`~DeltaBase.from_finetuned` class method. + + Arguments: + save_directory (:obj:`str` or :obj:`os.PathLike`): + Directory to which to save. Will be created if it doesn't exist. + save_config (:obj:`bool`, *optional*, defaults to :obj:`True`): + Whether or not to save the config of the model. Useful when in distributed training like TPUs and need + to call this function on all processes. In this case, set ``save_config=True`` only on the main process + to avoid race conditions. + state_dict (nested dictionary of :obj:`torch.Tensor`): + The state dictionary of the model to save. Will default to ``self.state_dict()``, but can be used to only + save parts of the model or if special precautions need to be taken when recovering the state dictionary + of a model (like when using model parallelism). + save_function (:obj:`Callable`): + The function to use to save the state dictionary. Useful on distributed training like TPUs when one + need to replace ``torch.save`` by another method. + push_to_hub (:obj:`bool`, *optional*, defaults to :obj:`False`): + Whether or not to push your model to the HuggingFace model hub after saving it. + + .. tip:: + + Using ``push_to_hub=True`` will synchronize the repository you are pushing to with ``save_directory``, + which requires ``save_directory`` to be a local clone of the repo you are pushing to if it's an existing + folder. Pass along ``temp_dir=True`` to use a temporary directory instead. + + kwargs: + Additional key word arguments passed along to the :py:meth:`~file_utils.PushToHubMixin.push_to_hub` method. + + .. note:: + + You may need to install git-lfs on your machine. + + .. code-block:: bash + + wget -P ~ https://github.com/git-lfs/git-lfs/releases/download/v3.0.2/git-lfs-linux-amd64-v3.0.2.tar.gz + cd ~ + tar -xvzf git-lfs-linux-amd64-v3.0.2.tar.gz + export PATH=~:$PATH + git-lfs install + + """ + if os.path.isfile(save_directory): + logger.error(f"Provided path ({save_directory}) should be a directory, not a file") + return + + if push_to_hub: + commit_message = kwargs.pop("commit_message", None) + repo = self._create_or_get_repo(save_directory, **kwargs) + + os.makedirs(save_directory, exist_ok=True) + + # Only save the model itself if we are using distributed training + + model_to_save = self.backbone_model# unwrap_model(self) + + # Save the model + if state_dict is None: + state_dict = model_to_save.state_dict() + + # Save the config + if save_config: + if not hasattr(self, "config"): + self.create_config_from_model() + self.add_configs_when_saving() + self.config.save_finetuned(save_directory) + + # If we save using the predefined names, we can load using `from_pretrained` + output_model_file = os.path.join(save_directory, WEIGHTS_NAME) + save_function(state_dict, output_model_file) + + logger.info(f"Model weights saved in {output_model_file}") + + if push_to_hub: + url = self._push_to_hub(repo, commit_message=commit_message) + logger.info(f"Model pushed to the hub in this commit: {url}") + + @classmethod + def from_finetuned(cls, + finetuned_model_name_or_path: Optional[Union[str, os.PathLike]], + backbone_model: nn.Module, + *model_args, + check_hash: Optional[bool] = True, + **kwargs): + r""" + Instantiate a finetuned delta model from a path. + The backbone_model is set in evaluation mode by default using ``model.eval()`` (Dropout modules are deactivated). + To further train the model, you can use the :meth:`freeze_module ` method. + + Parameters: + + finetuned_model_name_or_path (:obj:`str` or :obj:`os.PathLike`, *optional*): + Can be either: + + - A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co. + Valid model ids can be located at the root-level, like ``bert-base-uncased``, or namespaced under a + user or organization name, like ``dbmdz/bert-base-german-cased``. + - A path to a *directory* containing model weights saved using + :meth:`SaveLoadMixin.save_finetuned`, e.g., ``./my_model_directory/``. + - A path or url to a *tensorflow index checkpoint file* (e.g, ``./tf_model/model.ckpt.index``). In + this case, ``from_tf`` should be set to ``True`` and a configuration object should be provided as + ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a + PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards. + - A path or url to a model folder containing a *flax checkpoint file* in *.msgpack* format (e.g, + ``./flax_model/`` containing ``flax_model.msgpack``). In this case, ``from_flax`` should be set to + ``True``. + - ``None`` if you are both providing the configuration and state dictionary (resp. with keyword + arguments ``config`` and ``state_dict``). + backbone_model (:obj:`torch.nn.Module`): The backbone model to be modified. + model_args (sequence of positional arguments, *optional*): + All remaining positional arguments will be passed to the underlying model's ``__init__`` method. + config (Union[:obj:`BaseDeltaConfig`, :obj:`str`, :obj:`os.PathLike`], *optional*): Can be either: + - an instance of a class derived from :class:`~PretrainedConfig`, + - a string or path valid as input to :py:meth:`~PretrainedConfig.from_pretrained`. + + Configuration for the model to use instead of an automatically loaded configuration. Configuration can + be automatically loaded when: + + - The model is a model provided by the library (loaded with the *model id* string of a pretrained + model). + - The model was saved using :py:meth:`~PreTrainedModel.save_pretrained` and is reloaded by supplying the + save directory. + - The model is loaded by supplying a local directory as ``pretrained_model_name_or_path`` and a + configuration JSON file named *config.json* is found in the directory. + state_dict (Dict[:obj:`str`, :obj:`torch.Tensor`], *optional*): + A state dictionary to use instead of a state dictionary loaded from saved weights file. + This option can be used if you want to create a model from a pretrained configuration but load your own + weights. In this case though, you should check if using :py:meth:`~PreTrainedModel.save_pretrained` and + :py:meth:`~PreTrainedModel.from_pretrained` is not a simpler option. + cache_dir (:obj:`Union[str, os.PathLike]`, *optional*): + Path to a directory in which a downloaded pretrained model configuration should be cached if the + standard cache should not be used. + force_download (:obj:`bool`, *optional*, defaults to :obj:`False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + resume_download (:obj:`bool`, *optional*, defaults to :obj:`False`): + Whether or not to delete incompletely received files. Will attempt to resume the download if such a + file exists. + proxies (:obj:`Dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. + local_files_only(:obj:`bool`, *optional*, defaults to :obj:`False`): + Whether or not to only look at local files (i.e., do not try to download the model). + use_auth_token (:obj:`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If :obj:`True`, will use the token generated + when running ``transformers-cli login`` (stored in ``~/.huggingface``). + revision(:obj:`str`, *optional*, defaults to ``"main"``): + The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a + git-based system for storing models and other artifacts on huggingface.co, so ``revision`` can be any + identifier allowed by git. + mirror(:obj:`str`, *optional*): + Mirror source to accelerate downloads in China. If you are from China and have an accessibility + problem, you can set this option to resolve it. Note that we do not guarantee the timeliness or safety. + Please refer to the mirror site for more information. + torch_dtype (:obj:`str` or :obj:`torch.dtype`, *optional*): + Override the default :obj:`torch.dtype` and load the model under this dtype. If ``"auto"`` is passed the dtype + will be automatically derived from the model's weights. + + .. warning:: + + This feature is inherited from HuggingFace. We do not guarantee its usefulness currently. + One should only disable *_fast_init* to ensure backwards compatibility with `transformers.__version__ < + 4.6.0` for seeded model initialization. This argument will be removed at the next major version. See + `pull request 11471 `_ for more information. + kwargs (remaining dictionary of keyword arguments, *optional*): + Can be used to update the configuration object (after it being loaded) and initiate the model (e.g., + ``output_attentions=True``). Behaves differently depending on whether a ``config`` is provided or + automatically loaded: + + - If a configuration is provided with ``config``, ``**kwargs`` will be directly passed to the + underlying model's ``__init__`` method (we assume all relevant updates to the configuration have + already been done) + - If a configuration is not provided, ``kwargs`` will be first passed to the configuration class + initialization function (:py:meth:`~PretrainedConfig.from_pretrained`). Each key of ``kwargs`` that + corresponds to a configuration attribute will be used to override said attribute with the + supplied ``kwargs`` value. Remaining keys that do not correspond to any configuration attribute + will be passed to the underlying model's ``__init__`` function. + + .. tip:: + Passing ``use_auth_token=True`` is required when you want to use a private model. + + .. code-block:: python + + from transformers import AutoModelForSeq2SeqLM + t5 = AutoModelForSeq2SeqLM.from_pretrained("t5-base") + from opendelta import AutoDeltaModel + delta = AutoDeltaModel.from_finetuned("DeltaHub/lora_t5-base_mrpc", backbone_model=t5) + delta.log() + + + + """ + config = kwargs.pop("config", None) + state_dict = kwargs.pop("state_dict", None) + cache_dir = kwargs.pop("cache_dir", None) + + # ignore_mismatched_sizes = kwargs.pop("ignore_mismatched_sizes", False) + force_download = kwargs.pop("force_download", False) + resume_download = kwargs.pop("resume_download", False) + proxies = kwargs.pop("proxies", None) + # output_loading_info = kwargs.pop("output_loading_info", False) + local_files_only = kwargs.pop("local_files_only", False) + use_auth_token = kwargs.pop("use_auth_token", None) + revision = kwargs.pop("revision", None) + mirror = kwargs.pop("mirror", None) + from_pipeline = kwargs.pop("_from_pipeline", None) + from_auto_class = kwargs.pop("_from_auto", False) + # _fast_init = kwargs.pop("_fast_init", True) + torch_dtype = kwargs.pop("torch_dtype", None) + # low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False) + + user_agent = {"file_type": "model", "framework": "pytorch", "from_auto_class": from_auto_class} + + if is_offline_mode() and not local_files_only: + logger.info("Offline mode: forcing local_files_only=True") + local_files_only = True + + # Load config if we don't provide a configuration + if not isinstance(config, BaseDeltaConfig): + config_path = config if config is not None else finetuned_model_name_or_path + config, model_kwargs = cls.config_class.from_finetuned( + config_path, + cache_dir=cache_dir, + return_unused_kwargs=True, + force_download=force_download, + resume_download=resume_download, + proxies=proxies, + local_files_only=local_files_only, + use_auth_token=use_auth_token, + revision=revision, + _from_auto=from_auto_class, + _from_pipeline=from_pipeline, + **kwargs, + ) + + else: + model_kwargs = kwargs + + # Load model + if finetuned_model_name_or_path is not None: + finetuned_model_name_or_path = str(finetuned_model_name_or_path) + if os.path.isdir(finetuned_model_name_or_path): + if os.path.isfile(os.path.join(finetuned_model_name_or_path, WEIGHTS_NAME)): + # Load from a PyTorch checkpoint + archive_file = os.path.join(finetuned_model_name_or_path, WEIGHTS_NAME) + else: + raise EnvironmentError( + f"Error no file named {WEIGHTS_NAME} found in " + f"directory {finetuned_model_name_or_path}." + ) + elif os.path.isfile(finetuned_model_name_or_path) or is_remote_url(finetuned_model_name_or_path): + archive_file = finetuned_model_name_or_path + else: + archive_file = hf_bucket_url( + finetuned_model_name_or_path, + filename=WEIGHTS_NAME, + revision=revision, + mirror=mirror, + ) + + try: + # Load from URL or cache if already cached #TODO + + resolved_archive_file = cached_path( + archive_file, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + resume_download=resume_download, + local_files_only=local_files_only, + use_auth_token=use_auth_token, + user_agent=user_agent, + ) + except EnvironmentError as err: + logger.error(err) + msg = ( + f"Can't load weights for '{finetuned_model_name_or_path}'. Make sure that:\n\n" + ) + + if revision is not None: + msg += f"- or '{revision}' is a valid git identifier (branch name, a tag name, or a commit id) that exists for this model name as listed on its model page on 'https://huggingface.co/models'\n\n" + + raise EnvironmentError(msg) + + if resolved_archive_file == archive_file: + logger.info(f"loading weights file {archive_file}") + else: + logger.info(f"loading weights file {archive_file} from cache at {resolved_archive_file}") + else: + resolved_archive_file = None + + # load pt weights early so that we know which dtype to init the model under + + if state_dict is None: + try: + state_dict = torch.load(resolved_archive_file, map_location="cpu") + except Exception as e: + try: + with open(resolved_archive_file) as f: + if f.read().startswith("version"): + raise OSError( + "You seem to have cloned a repository without having git-lfs installed. Please install " + "git-lfs and run `git lfs install` followed by `git lfs pull` in the folder " + "you cloned." + ) + else: + raise ValueError from e + except (UnicodeDecodeError, ValueError): + raise OSError( + f"Unable to load weights from pytorch checkpoint file for '{finetuned_model_name_or_path}' " + f"at '{resolved_archive_file}'. " + "If you tried to load a PyTorch model from a TF 2.0 checkpoint, please set from_tf=True." + ) + + # set dtype to instantiate the model under: + # 1. If torch_dtype is not None, we use that dtype + # 2. If torch_dtype is "auto", we auto-detect dtype from the loaded state_dict, by checking its first + # weights entry - we assume all weights are of the same dtype + # we also may have config.torch_dtype available, but we won't rely on it till v5 + dtype_orig = None + if torch_dtype is not None: + if isinstance(torch_dtype, str): + if torch_dtype == "auto": + torch_dtype = next(iter(state_dict.values())).dtype + else: + raise ValueError( + f"`torch_dtype` can be either a `torch.dtype` or `auto`, but received {torch_dtype}" + ) + dtype_orig = cls._set_default_torch_dtype(torch_dtype) + + + # Initialize the model from config and attach the delta model to the backbone_model. + delta_model = cls.from_config(config, backbone_model, *model_args, **model_kwargs, ) + + # load the state_dict into the backbone_model. As the delta model's parameter + # is the same object as the deltas in the backbone model with different reference name, + # the state_dict will also be loaded into the delta model. + delta_model._load_state_dict_into_backbone(backbone_model, state_dict) + + backbone_hash = gen_model_hash(backbone_model) + if check_hash and hasattr(config, "backbone_hash") and \ + config.backbone_hash is not None and \ + config.backbone_hash != backbone_hash: + logger.warning("The config has an hash of the backbone model, and is" + "different from the hash of the loaded model. This indicates a mismatch" + "between the backbone model that the delta checkpoint is based on and" + "the one you loaded. You propobability need to Train the model instead of" + "directly inference. ") + + # Set model in evaluation mode to deactivate DropOut modules by default + backbone_model.eval() + + return delta_model + diff --git a/opendelta/utils/signature.py b/opendelta/utils/signature.py new file mode 100644 index 0000000..b559f92 --- /dev/null +++ b/opendelta/utils/signature.py @@ -0,0 +1,54 @@ +import inspect +from collections import namedtuple + +def signature(f): + r"""Get the function f 's input arguments. A useful gadget + when some function slot might be instantiated into multiple functions. + + Args: + f (:obj:`function`) : the function to get the input arguments. + + Returns: + namedtuple : of args, default, varargs, keywords, respectively.s + + """ + sig = inspect.signature(f) + args = [ + p.name for p in sig.parameters.values() + if p.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD + ] + varargs = [ + p.name for p in sig.parameters.values() + if p.kind == inspect.Parameter.VAR_POSITIONAL + ] + varargs = varargs[0] if varargs else None + keywords = [ + p.name for p in sig.parameters.values() + if p.kind == inspect.Parameter.VAR_KEYWORD + ] + keywords = keywords[0] if keywords else None + defaults = [ + p.default for p in sig.parameters.values() + if p.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD + and p.default is not p.empty + ] or None + argspec = namedtuple('Signature', ['args', 'defaults', + 'varargs', 'keywords']) + return argspec(args, defaults, varargs, keywords) + +def get_arg_names(f): + r""" Get a functions argument name, remove the ``self`` argument + """ + args = signature(f).args + if args[0] == "self": + args = args[1:] + return args + + +def get_arg_names_inside_func(func): + r""" Get the functions argument name inside the function itself. Remove ``self`` argument. + """ + arg_names = func.__code__.co_varnames[: func.__code__.co_argcount] + if arg_names[0] == "self": + arg_names = arg_names[1:] + return arg_names \ No newline at end of file diff --git a/opendelta/utils/structure_mapping.py b/opendelta/utils/structure_mapping.py new file mode 100644 index 0000000..bb1f1de --- /dev/null +++ b/opendelta/utils/structure_mapping.py @@ -0,0 +1,349 @@ +from typing import OrderedDict +import copy +import opendelta.utils.logging as logging +from opendelta.utils.visualization import Visualization +logger = logging.get_logger(__name__) +t5_mapping = { + "shared": {"__name__":"embeddings"}, + "encoder": {"__name__":"encoder", + "embed_tokens": {"__name__":"embeddings"}, + "block": {"__name__":"block", + "$": {"__name__":"$", + "layer.0": {"__name__":"attn", + "SelfAttention.q": {"__name__":"q"}, + "SelfAttention.k": {"__name__":"k"}, + "SelfAttention.v": {"__name__":"v"}, + "SelfAttention.o": {"__name__":"proj"}, + "SelfAttention.relative_attention_bias": {"__name__":""}, + "layer_norm": {"__name__":"layer_norm"}, + }, + "layer.1": {"__name__":"ff", + "DenseReluDense.wi": {"__name__":"w1"}, + "layer_norm": {"__name__":"layer_norm"}, + "DenseReluDense.wo": {"__name__":"w2"}, + } + } + }, + "final_layer_norm": {"__name__":"layer_norm"}, + }, + "decoder": {"__name__":"decoder", + "embed_tokens": {"__name__":"embeddings"}, + "block": {"__name__":"block", + "$": {"__name__":"$", + "layer.0": {"__name__":"attn", + "SelfAttention.q": {"__name__":"q"}, + "SelfAttention.k": {"__name__":"k"}, + "SelfAttention.v": {"__name__":"v"}, + "SelfAttention.o": {"__name__":"proj"}, + "SelfAttention.relative_attention_bias": {"__name__":""}, + "layer_norm": {"__name__":"layer_norm"}, + }, + "layer.1": {"__name__":"crossattn", + "EncDecAttention.q": {"__name__":"q"}, + "EncDecAttention.k": {"__name__":"k"}, + "EncDecAttention.v": {"__name__":"v"}, + "EncDecAttention.o": {"__name__":"proj"}, + "layer_norm": {"__name__":"layer_norm"}, + }, + "layer.2": {"__name__":"ff", + "DenseReluDense.wi": {"__name__":"w1"}, + "layer_norm": {"__name__":"layer_norm"}, + "DenseReluDense.wo": {"__name__":"w2"}, + } + } + }, + "final_layer_norm": {"__name__":"layer_norm"}, + } +} + + +roberta_mapping = { + "roberta.embeddings.word_embeddings": {"__name__":"embeddings"}, + "roberta.embeddings.position_embeddings": {"__name__":""}, + "roberta.embeddings.token_type_embeddings": {"__name__":""}, + "roberta.embeddings.LayerNorm": {"__name__":""}, + "roberta.encoder": {"__name__":"encoder", + "layer": {"__name__":"block", + "$": {"__name__":"$", + "attention": {"__name__":"attn", + "self.query": {"__name__":"q"}, + "self.key": {"__name__":"k"}, + "self.value": {"__name__":"v"}, + "output.dense": {"__name__":"proj"}, + "output.LayerNorm": {"__name__":"layer_norm"}, + }, + "output": {"__name__":"ff", + "dense": {"__name__":"w2"}, + "LayerNorm": {"__name__":"layer_norm"} + }, + "intermediate.dense": {"__name__":"ff.w1"}, + } + } + }, + "lm_head": {"__name__":"lm_head", + "dense": {"__name__":""}, + "layer_norm": {"__name__":""}, + "decoder": {"__name__":"proj"}, + }, +} + + + +bert_mapping = { + "bert.embeddings.word_embeddings": {"__name__":"embeddings"}, + "bert.embeddings.position_embeddings": {"__name__":""}, + "bert.embeddings.token_type_embeddings": {"__name__":""}, + "bert.embeddings.LayerNorm": {"__name__":""}, + "bert.encoder": {"__name__":"encoder", + "layer": {"__name__":"block", + "$": {"__name__":"$", + "attention": {"__name__":"attn", + "self.query": {"__name__":"q"}, + "self.key": {"__name__":"k"}, + "self.value": {"__name__":"v"}, + "output.dense": {"__name__":"proj"}, + "output.LayerNorm": {"__name__":"layer_norm"}, + }, + "output": {"__name__":"ff", + "dense": {"__name__":"w2"}, + "LayerNorm": {"__name__":"layer_norm"} + }, + "intermediate.dense": {"__name__":"ff.w1"}, + } + } + }, + "cls.predictions": {"__name__": "lm_head", + "transform.dense": {"__name__":""}, + "transform.LayerNorm": {"__name__":""}, + "decoder": {"__name__":"proj"}, + } +} + +debertav2_mapping = { + "deberta.embeddings.word_embeddings": {"__name__":"embeddings"}, + "deberta.embeddings.LayerNorm": {"__name__":""}, + "deberta.encoder": {"__name__":"encoder", + "layer": {"__name__":"block", + "$": {"__name__":"$", + "attention": {"__name__":"attn", + "self.query_proj": {"__name__":"q"}, + "self.key_proj": {"__name__":"k"}, + "self.value_proj": {"__name__":"v"}, + "output.dense": {"__name__":"proj"}, + "output.LayerNorm": {"__name__":"layer_norm"}, + }, + "output": {"__name__":"ff", + "dense": {"__name__":"w2"}, + "LayerNorm": {"__name__":"layer_norm"} + }, + "intermediate.dense": {"__name__":"ff.w1"}, + } + }, + "rel_embeddings": {"__name__": ""}, + "LayerNorm": {"__name__": ""}, + "conv": {"__name__": "", + "conv": {"__name__": ""}, + "LayerNorm": {"__name__": ""} + } + }, + "lm_predictions.lm_head": {"__name__":"lm_head", + "dense": {"__name__":""}, + "LayerNorm": {"__name__":""}, + "bias": {"__name__": ""} + }, +} + +gpt2_mapping = { + "transformer.wte": {"__name__":"embeddings"}, + "transformer.wpe": {"__name__":""}, + "transformer.h": {"__name__":"decoder.block", + "$": {"__name__":"$", + "attn": {"__name__":"attn", + "c_attn": {"__name__":"q,k,v"}, + "c_proj": {"__name__":"proj"}, + }, + "ln_1": {"__name__":"attn.layer_norm"}, + "mlp":{ "__name__": "ff", + "c_fc": {"__name__":"w1"}, + "c_proj": {"__name__":"w2"} + }, + "ln_2": {"__name__":"ff.layer_norm"}, + }, + }, + "transformer.ln_f": {"__name__":"decoder.layernorm"}, + "lm_head": {"__name__":"lm_head.proj"}, +} + +distilbert_mapping = { + "distilbert.embeddings.word_embeddings": {"__name__":"embeddings"}, + "distilbert.embeddings.position_embeddings": {"__name__":""}, + "distilbert.embeddings.token_type_embeddings": {"__name__":""}, + "distilbert.embeddings.LayerNorm": {"__name__":""}, + "distilbert.transformer": {"__name__":"encoder", + "layer": {"__name__":"block", + "$": {"__name__":"$", + "attention": {"__name__":"attn", + "q_lin": {"__name__":"q"}, + "k_lin": {"__name__":"k"}, + "v_lin": {"__name__":"v"}, + "out_lin": {"__name__":"proj"}, + }, + "ffn": {"__name__":"ff", + "lin1": {"__name__":"w1"}, + "lin2": {"__name__":"w2"}, + }, + "sa_layer_norm": {"__name__":"attn.layer_norm"}, + "output_layer_norm":{"__name__": "ff.layer_norm"} + } + } + } +} + +def transform(org_key, mapping, strict=True, warning=False, verbose=False): + + chain = org_key.split(".") + query = "" + node = mapping + + new_chain = [] + for elem in chain: + query += elem + if query in node: + node = node[query] + new_elem = node["__name__"] + if new_elem == "": + if strict: + if warning: + print(f"'{org_key}' has no common mapping.") + return + else: + new_chain.append(query) + else: + new_chain.append(new_elem) + query = "" + elif "$" in node: + node = node["$"] + new_chain.append(query) + query = "" + else: + query += "." + if query!="": + if strict: + if warning: + print("A part of the orginial key hasn't been matched!") + return + else: + new_chain.append(query.strip(".")) # tailing query + new_key = ".".join(new_chain) + if verbose: + print(f"{org_key} => {new_key}") + return new_key + + + + +def mapping_for_SequenceClassification(mapping, type): + mapping = copy.deepcopy(mapping) + if type == "roberta": + mapping.pop("lm_head") + mapping['classifier'] = {"__name__":"classifier", + "dense": {"__name__": "dense"}, + "out_proj": {"__name__":"out_proj"} + } + elif type == "bert": + mapping.pop("lm_head") + mapping["classifier"] = {"__name__": "classifier"} + elif type == "deberta": + mapping.pop("lm_predictions.lm_head") + mapping["pooler"] = {"__name__": "classifier"} + mapping["classifier"] = {"__name__": "classifier"} + else: + raise NotImplementedError + return mapping + +def mapping_for_ConditionalGeneration(mapping, type): + mapping = copy.deepcopy(mapping) + if type == "t5": + mapping["lm_head"] = {"__name__":"lm_head.proj"} + else: + raise NotImplementedError + return mapping + +class _LazyLoading(OrderedDict): + def __init__(self, mapping): + self._mapping_string = mapping + self._mapping = {} + + def __getitem__(self, key): + if key not in self._mapping_string: + raise KeyError(key) + value = self._mapping_string[key] + self._mapping[key] = eval(value) + return self._mapping[key] + + def keys(self): + return list(self._mapping_string.keys()) + + def __contains__(self, item): + + return item in self._mapping_string + + +class CommonStructureMap(object): + r""" A lazy loading structure map. + """ + Mappings = _LazyLoading({ + "RobertaForSequenceClassification": """mapping_for_SequenceClassification(roberta_mapping, "roberta")""", + "RobertaForMaskedLM": "roberta_mapping", + "BertForMaskedLM": "bert_mapping", + "BertForSequenceClassification": """mapping_for_SequenceClassification(bert_mapping, "bert")""", + "T5ForConditionalGeneration": """mapping_for_ConditionalGeneration(t5_mapping, "t5")""", + "DebertaV2ForSequenceClassification": """mapping_for_SequenceClassification(debertav2_mapping, "deberta")""" + }) + + SpecialModelInverseMaps = { + } + def __init__(self, mapping): + if not isinstance(mapping, dict): + raise TypeError(f"Initial a {CommonStructureMap.__name__} using a non-dict object. Consider using `load` instead.") + self.mapping = mapping + + + @classmethod + def load(cls, backbone_model, strict=True, warining=False, visualize=True): + r"""Doc + """ + backbone_class = type(backbone_model).__name__ + if backbone_class not in cls.Mappings: + raise KeyError(backbone_class) + mapping = cls.Mappings[backbone_class] + if visualize: + logger.info("Since you are using the common structure mapping, draw the transformed parameter structure for checking.") + vis = Visualization(backbone_model) + vis.structure_graph(common_structure=True, mapping=mapping) + return cls(mapping) + + def __repr__(self,): + return self.mapping + + + def transform(self, org_key, strict=True, warning=False): + return transform(org_key, self.mapping, strict, warning) + + + +if __name__ == "__main__": + from openprompt.plms import load_plm + import argparse + parser = argparse.ArgumentParser("") + parser.add_argument("--model", type=str, default='t5-lm', help="We test both t5 and t5-lm in this scripts, the corresponding tokenizerwrapper will be automatically loaded.") + parser.add_argument("--model_name_or_path", default="t5-base-lm-adapt") + parser.add_argument("--cache_base", default='/home/hushengding/plm_cache/') + parser.add_argument("--keep_non_params", action="store_true") + parser.add_argument("--expand_params", action="store_true") + args = parser.parse_args() + plm, tokenizer, model_config, WrapperClass = load_plm(args.model, args.cache_base+args.model_name_or_path) + + for name, _ in plm.named_modules(): + transform(name, t5_mapping, strict=True, warning=False) + \ No newline at end of file diff --git a/opendelta/utils/visualization.py b/opendelta/utils/visualization.py new file mode 100644 index 0000000..bbe5b12 --- /dev/null +++ b/opendelta/utils/visualization.py @@ -0,0 +1,438 @@ +from typing import List +from rich.tree import Tree as RichTree +from rich import print as richprint +import torch +import torch.nn as nn +import re +from collections import OrderedDict +import opendelta.utils.logging as logging +logger = logging.get_logger(__name__) +class ModuleTree(RichTree): + def __init__( + self, + module_name=None, + info=None, + is_param_node=False, + type_color="green", + param_color="red", + main_color="white", + style = "tree", + guide_style = "tree.line", + expanded=True, + highlight=False, + ): + self.module_name = module_name + self.info = info + self.is_param_node = is_param_node + self.type_color = type_color + self.param_color = param_color + self.main_color = main_color + label = self.set_label() + super().__init__(label,style=style,guide_style=guide_style,expanded=expanded,highlight=highlight) + + + def add( + self, + module_name=None, + info=None, + is_param_node=False, + type_color="green", + param_color="red", + main_color="white", + style=None, + guide_style=None, + expanded=True, + highlight=False, + ): + node = ModuleTree( + module_name, + info, + is_param_node, + type_color, + param_color, + main_color, + style=self.style if style is None else style, + guide_style=self.guide_style if guide_style is None else guide_style, + expanded=expanded, + highlight=self.highlight if highlight is None else highlight, + ) + self.children.append(node) + return node + + def set_label(self): + if self.module_name is not None: + label = f"[{self.main_color}]{self.module_name}" + else: + label = "" + if self.info is not None: + if not self.is_param_node: + label += f" [{self.type_color}]({self.info})" + else: + label += f" [{self.param_color}]{self.info}" + self.label = label + return label + + +class Visualization(object): + r""" + Better visualization tool for *BIG* pretrained models. + + - Better repeated block representation + - Clearer parameter position + - and Visible parameter state. + + Args: + plm (:obj:`torch.nn.Module`): The pretrained model, actually can be any pytorch module. + + """ + def __init__(self, plm: nn.Module): + + self.plm = plm + self.type_color = "green" + self.param_color = "cyan" + self.duplicate_color = "red" + self.normal_color = "white" + self.virtual_color = "orange" + self.not_common_color = "bright_black" + self.no_grad_color = "rgb(0,70,100)" + self.delta_color = "rgb(175,0,255)" + + def check_mode(self, ): + if self.keep_non_params and self.common_structure: + raise RuntimeError("keep_non_params can't be used will common_structure. The common structure only contains parameter nodes.") + if self.common_structure: + if self.mapping is None: + raise RuntimeError("Mapping hasn't been given.") + + def structure_graph(self, + rootname="root", + expand_params=False, + keep_non_params=False, + common_structure=False, + mapping=None, + only_common=False, + printTree=True, + ): + r"""Draw the structure graph in command line. + + Args: + rootname (:obj:`str`) The root node's name. + keep_non_params (:obj:`bool`) Display the modules that does not have parameters, such as nn.Dropout + expand_params (:obj:`bool`) Display parameter infomation (shape, etc) in seperate lines. " + common_structure (:obj:`bool`) Whether convert the structure into a common structure defined in structure_mapping.py. The not common structure will be displayed in grey. + only_common (:obj:`bool`) Whether ignore the modules that are not in common structure. This will result in a more compact view. Default to False. + mapping (:obj:`dict`) The structure mapping. Must provide if common_structure=True. + """ + + self.keep_non_params = keep_non_params + self.expand_params = expand_params + self.rootname = rootname + self.only_common = only_common + self.common_structure = common_structure + self.mapping = mapping + self.check_mode() + # root_tree = self.build_tree(rootname) + self.root_tree = ModuleTree(self.rootname) + if common_structure: + self.build_common_tree(self.plm, mapping, self.root_tree) + else: + self.build_tree(self.plm, self.root_tree) + self.prune_tree(self.root_tree) + if not self.expand_params: + self.fold_param_node(self.root_tree) + if printTree: + richprint(self.root_tree) + return self.root_tree + + + + + def is_leaf_module(self, module): + r"""[NODOC] Whether the module is a leaf module + """ + return len([n for n,_ in module.named_children()]) == 0 + + def build_tree(self, module:nn.Module, tree:ModuleTree=None): + r"""[NODOC] build the originial tree structure + """ + if self.is_leaf_module(module): + return + else: + for n,m in module.named_children(): + type_info = re.search(r'(?<=\').*(?=\')', str(type(m))).group() + type_info = type_info.split(".")[-1] + newnode = tree.add(n, info=type_info, type_color=self.type_color) + self.add_param_info_node(m, newnode) + self.build_tree(module=m, tree=newnode) + + def has_parameter(self, module): + return len([p for p in module.parameters()])>0 + + + def build_common_tree(self, module:nn.Module, mapping, tree:ModuleTree=None, query="", key_to_root=""): + r""" (Unstable) build the common tree structure + """ + if self.is_leaf_module(module): + if len(query)>0: # the field is not in mapping + if self.has_parameter(module): + # from IPython import embed + # embed(header = "in leaf") + logger.warning(f"Parameter node {query} not found under tree {tree.module_name} and module {module}. Is your mapping correct?") # WARNING + return + else: + for n,m in module.named_children(): + new_query = query+n + type_info = re.search(r'(?<=\').*(?=\')', str(type(m))).group() + type_info = type_info.split(".")[-1] + if new_query in mapping or "$" in mapping: + # print("query",new_query) + # from IPython import embed + # embed() + if new_query in mapping: + new_mapping = mapping[new_query] + name = new_mapping["__name__"] + if len(name.split(".")) > 1: # new key contains a hierarchy , then unfold the hierarchy. + # insert virtual node + hierachical_name = name.split(".") + temp_tree = self.find_or_insert(tree, hierachical_name) + newnode = temp_tree.add(hierachical_name[-1], info=type_info, type_color=self.type_color) + elif name=="": # the key not in a predefined common structure + if self.only_common: + continue + else: # add the originial name into the tree + newnode = tree.add(new_query, info=type_info, main_color=self.not_common_color, type_color=self.not_common_color) + else: # a single new key + newnode = self.find_not_insert(tree, [name,""]) # try to find the node + if newnode is not None: + newnode.info = type_info + newnode.type_color = self.type_color + newnode.set_label() + else: + newnode = tree.add(name, info=type_info, type_color=self.type_color) + elif "$" in mapping: # match any thing in the field. + new_mapping = mapping["$"] + newnode = tree.add(n, info=type_info, type_color=self.type_color) + self.add_param_info_node(m, newnode) + self.build_common_tree(module=m, tree=newnode, mapping=new_mapping, key_to_root=key_to_root+"."+new_query) + else: + # try to find from root + # trsf_key = transform(key_to_root.strip("."), self.mapping) + # parent_node = self.find_not_insert(self.root_tree, trsf_key.split(".")+[""]) + # if parent_node is not None: + # new_mapping = mapping[new_query] + # newnode = parent_node.add(name, info=type_info, type_color=self.type_color) + # self.build_common_tree(module=m, tree=parent_node, mapping ) + # print("notin query",new_query) + # if new_query == "dense": + # from IPython import embed + # embed() + # print(f"::{query},,{new_query}, {list(mapping.keys())}") + new_query += "." + self.build_common_tree(module=m, tree=tree, mapping=mapping, query=new_query, key_to_root=key_to_root) + + + + def find_or_insert(self, tree:ModuleTree, hierachical_name:List[str] ): + r"""[NODOC] Find the node, if not find, insert a virtual node + """ + if len(hierachical_name)==1: + return tree + names = [x.module_name for x in tree.children] + if hierachical_name[0] not in names: + new_node = tree.add(hierachical_name[0], info="Virtual", type_color=self.virtual_color) + else: + for x in tree.children: + if x.module_name == hierachical_name[0]: + new_node = x + break + return self.find_or_insert(new_node, hierachical_name=hierachical_name[1:]) + + def find_not_insert(self, tree:ModuleTree, hierachical_name:List[str] ): + r"""[NODOC] Find the node but not insert + """ + if len(hierachical_name)==1: + return tree + names = [x.module_name for x in tree.children] + if hierachical_name[0] not in names: + return None + else: + for x in tree.children: + if x.module_name == hierachical_name[0]: + new_node = x + break + return self.find_not_insert(new_node, hierachical_name=hierachical_name[1:]) + + + + def fold_param_node(self, t: ModuleTree, p:ModuleTree=None): + r"""[NODOC] place the parameters' infomation node right after the module that contains the parameters. + E.g. w1 (Linear) + -- weight: [32128, 1024] + => + w1 (Linear) weight: [32128, 1024] + + """ + if hasattr(t,"is_param_node") and t.is_param_node: + p.label += t.label + return True # indicate whether should be removed + elif len(t.children) == 0: + if self.keep_non_params: + return False + else: + return True + else: + rm_idx = [] + for idx, c in enumerate(t.children): + if self.fold_param_node(t=c, p=t): + rm_idx.append(idx) + t.children = [t.children[i] for i in range(len(t.children)) if i not in rm_idx] + return False + + def prune_tree(self, t: ModuleTree): + r"""[NODOC] Calculate the _finger_print of a module as the _finger_print of all child node plus the _finger_print of itself. + The leaf node will have the _finger_print == label. + Merge the different node that as the same _finger_print into a single node. + """ + if len(t.children) == 0: + setattr(t, "_finger_print", t.label) + return + + for idx, sub_tree in enumerate(t.children): + self.prune_tree(sub_tree) + + t_finger_print = t.label +"::"+";".join([x._finger_print for x in t.children]) + setattr(t, "_finger_print", t_finger_print) + + nohead_finger_print_dict = OrderedDict() + for child_id, sub_tree in enumerate(t.children): + fname_list = sub_tree._finger_print.split("::") + if len(fname_list)==1: + fname = fname_list[0] + else: + fname = "::".join(fname_list[1:]) + if fname not in nohead_finger_print_dict: + nohead_finger_print_dict[fname] = [child_id] + else: + nohead_finger_print_dict[fname].append(child_id) + + new_childrens = [] + for groupname in nohead_finger_print_dict: + representative_id = nohead_finger_print_dict[groupname][0] + representative = t.children[representative_id] + group_node = [t.children[idx] for idx in nohead_finger_print_dict[groupname]] + + representative = self.extract_common_and_join(group_node) + new_childrens.append(representative) + t.children = new_childrens + + + def extract_common_and_join(self, l:List[ModuleTree]): + r"""[NODOC] Some modules that have the same info (e.g., are all "Linear") have different names (e.g., w1,w2) + Merge them. + E.g. tree1.module_name = "w1", tree1.info = "Linear"; tree2.module_name = "w1", tree2.info = "Linear" + -> representive.module_name = "w1,w2", representive.info = "Linear" + """ + representative = l[0] + if len(l)==1: + return representative + name_list = [x.module_name for x in l] + info_list = [x.info for x in l] + type_hint_dict = OrderedDict() + for x, y in zip(name_list, info_list): + if y not in type_hint_dict: + type_hint_dict[y] = [x] + else: + type_hint_dict[y].append(x) + + s = "" + names = "" + typeinfos = "" + for t in type_hint_dict: + group_components = type_hint_dict[t] + group_components = self.neat_expr(group_components) + names += group_components+"," + typeinfos += t+"," + s += f"[{self.duplicate_color}]{group_components}[{self.type_color}]({t})" + s += f"," + names = names[:-1] + s = s[:-1] + typeinfos = typeinfos[:-1] + representative.module_name = names + representative.type_info = typeinfos + representative.label = s + return representative + + def neat_expr(self, l:List[str]): + r"""[NODOC] A small tool function to arrange the consecutive number into interval display. + E.g., ["1","2","3","5","6","9","10","11","12"] -> ["1-3","5-6","9-12"] + """ + try: + s = self.ranges([int(x.strip()) for x in l]) + s = [str(x)+"-"+str(y) for x,y in s] + return ",".join(s) + except: + return ",".join(l) + + def ranges(self, nums:List[int]): + r"""[NODOC] A small tool function to arrange the consecutive number into interval display. + E.g., [1,2,3,5,6,9,10,11,12] -> [[1,3],[5,6],[9,12]] + """ + nums = sorted(set(nums)) + gaps = [[s, e] for s, e in zip(nums, nums[1:]) if s+1 < e] + edges = iter(nums[:1] + sum(gaps, []) + nums[-1:]) + return list(zip(edges, edges)) + + def add_param_info_node(self, m:nn.Module, tree:ModuleTree, record_grad_state=True, record_delta=True): + r"""[NODOC] Add parameter infomation of the module. The parameters that are not inside a module (i.e., created using nn.Parameter) will be added in this function. + """ + known_module = [n for n,c in m.named_children()] + for n,p in m.named_parameters(): + if n.split(".")[0] not in known_module: + if len(n.split(".")) > 1: raise RuntimeError(f"The name field {n} should be a parameter since it doesn't appear in named_children, but it contains '.'") + info = "{}:{}".format(n, list(p.shape)) + + if record_grad_state: + if not p.requires_grad: + color = self.no_grad_color + else: + color = self.param_color + else: + color = self.param_color + + if record_delta: + if hasattr(p, "_is_delta") and getattr(p, "_is_delta"): + color = self.delta_color + + tree.add(info=info, is_param_node=True, param_color=color) + + + + + + + +if __name__=="__main__": + # example command line: + # 1. python opendelta/utils/visualization.py --model t5-lm --model_name_or_path t5-large-lm-adapt --common_structure --only_common + # 2. python opendelta/utils/visualization.py --model roberta --model_name_or_path roberta-large --common_structure + # 3. python opendelta/utils/visualization.py --model gpt2 --model_name_or_path gpt2-medium --keep_non_params --expand_params + from openprompt.plms import load_plm + import argparse + parser = argparse.ArgumentParser("") + parser.add_argument("--model", type=str, default='t5-lm', help="We test both t5 and t5-lm in this scripts, the corresponding tokenizerwrapper will be automatically loaded.") + parser.add_argument("--model_name_or_path", default="t5-large-lm-adapt") + parser.add_argument("--cache_base", default='/home/hushengding/plm_cache/') + parser.add_argument("--keep_non_params", action="store_true", help="Display the modules that does not have parameters, such as nn.Dropout") + parser.add_argument("--expand_params", action="store_true", help="Display parameter infomation (shape, etc) in seperate lines. ") + parser.add_argument("--common_structure", action="store_true", help="Whether convert the structure into a common structure defined in structure_mapping.py. The not common structure will be displayed in grey." ) + parser.add_argument("--only_common", action="store_true", help="Whether ignore the modules that are not in common structure. This will result in a more compact view. Default to False") + args = parser.parse_args() + plm, tokenizer, model_config, WrapperClass = load_plm(args.model, args.cache_base+args.model_name_or_path) + print("Model Loaded!") + if args.common_structure: + from opendelta.utils.structure_mapping import Mappings + mapping = Mappings[args.model] + else: + mapping = None + visobj = Visualization(plm) + visobj.structure_graph(rootname=args.model_name_or_path, keep_non_params=args.keep_non_params, expand_params=args.expand_params, common_structure=args.common_structure, only_common=args.only_common, mapping=mapping) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e5fa365 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,11 @@ +torch>=1.9.0 +transformers==4.10.0 +datasets==1.17.0 +sentencepiece==0.1.96 +tqdm==4.62.2 +openprompt +loralib +decorator +rich +web.py +gitpython diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..a10b156 --- /dev/null +++ b/setup.py @@ -0,0 +1,42 @@ + +import setuptools +import os + +def get_requirements(path): + ret = [] + with open(os.path.join(path, "requirements.txt"), encoding="utf-8") as freq: + for line in freq.readlines(): + ret.append( line.strip() ) + return ret + + +path = os.path.dirname(os.path.abspath(__file__)) +requires = get_requirements(path) +print(requires) + +with open('README.md', 'r') as f: + setuptools.setup( + name = 'opendelta', + version = '0.0.1', + description = "An open source framework for delta learning (parameter efficient learning).", + long_description=open("README.md", "r", encoding="utf-8").read(), + long_description_content_type="text/markdown", + author = '', + author_email = 'shengdinghu@gmail.com', + license="Apache", + url="https://github.com/thunlp/OpenDelta", + keywords = ['PLM', 'Parameter-efficient-Learning', 'AI', 'NLP'], + python_requires=">=3.8.0", + install_requires=requires, + packages=setuptools.find_packages(), + classifiers=[ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + ] + ) \ No newline at end of file