init

2022-05-02 15:54:55 +08:00 · 2022-05-02 15:54:55 +08:00 · 351aa3a40d
parent b3935927dc
commit 351aa3a40d
5 changed files with 175 additions and 2 deletions
--- a/opendelta/init.py
+++ b/opendelta/init.py
@ -21,6 +21,7 @@ from .delta_models.adapter import AdapterModel
 from .delta_models.prefix import PrefixModel
 from .delta_models.soft_prompt import SoftPromptModel
 from .delta_models.low_rank_adapter import LowRankAdapterModel
 from .delta_models.split import SplitModel
 from .utils.visualization import Visualization
--- a/opendelta/delta_models/lora.py
+++ b/opendelta/delta_models/lora.py
@ -5,7 +5,6 @@ from opendelta.utils.signature import get_arg_names, get_arg_names_inside_func
 from opendelta.utils.name_based_addressing import *
 from opendelta.basemodel import DeltaBase
 from transformers.models.t5 import T5ForConditionalGeneration
 import loralib as lora
 import torch.nn as nn
 from opendelta import BaseDeltaConfig
 import math
--- a/opendelta/delta_models/soft_prompt.py
+++ b/opendelta/delta_models/soft_prompt.py
@ -1,4 +1,3 @@
 from examples_prompt.metrics.metrics import exact_match
 from opendelta.utils.signature import get_arg_names, get_arg_names_inside_func
 from opendelta.utils.name_based_addressing import *
 from opendelta.utils.cuda import get_device
--- a/opendelta/delta_models/split.py
+++ b/opendelta/delta_models/split.py
@ -0,0 +1,159 @@
 from functools import partial
 from random import random
 from typing import Optional, Union
 from cv2 import accumulate
 from opendelta.utils.signature import get_arg_names_inside_func
 from opendelta.utils.name_based_addressing import *
 from opendelta.utils.cuda import get_device
 from opendelta.basemodel import DeltaBase
 import loralib as lora
 import torch.nn as nn
 import torch
 import math
 from opendelta.delta_models.layers.activations import Activations
 import inspect
 from opendelta import BaseDeltaConfig
 import opendelta.utils.logging as logging
 import numpy as np
 from opendelta import global_setting
 logger = logging.get_logger(__name__)
 from itertools import accumulate
 from opendelta.delta_models.adapter import AdapterLayer
 class SplitLayer(nn.Module):
    r"""A layer of splitting module.
    """
    def __init__(self, batch_size:list):
        super().__init__()
        self.batch_size = list(accumulate(batch_size))
        self.modulelist = nn.ModuleList()
        self.pseudo_inited = False
    def append(self, module):
        self.modulelist.append(module)
    def post_forward(self, output):
        if isinstance(output, tuple):
            hiddens = output[0]
        elif isinstance(output, torch.Tensor):
            hiddens = output
        else:
            raise TypeError
        if hiddens.shape[0] != self.batch_size[-1]:
            if self.pseudo_inited:
                raise RuntimeError('The batch size of the input is not consistent with split config.')
            self.pseudo_inited = True
            outputs = None
            for i in range(len(self.batch_size)):
                outputs = self.modulelist[i].post_forward(
                    hiddens
                )
            merge_output = outputs
        else:
            split_outputs = [None]*len(self.batch_size)
            for i in range(len(self.batch_size)):
                split_outputs[i] = self.modulelist[i].post_forward(
                    hiddens[(0 if i==0 else self.batch_size[i-1]):self.batch_size[i]]
                )
            merge_output = torch.cat(split_outputs)
        if isinstance(output, tuple):
            output = (merge_output,) + output[1:]
        elif isinstance(output, torch.Tensor):
            output = merge_output
        else:
            raise TypeError
        return output
 class SplitConfig(BaseDeltaConfig):
    r"""
    This is the configuration class to store the configuration of a :py:class:`~SplitModel`
    """
    def __init__(
        self,
        batch_size: list = [8, 1, 7],
        **kwargs
    ):
        super().__init__(**kwargs)
        arg_names = get_arg_names_inside_func(self.__init__)
        for arg_name in arg_names:
            if not hasattr(self, arg_name): # the arg has not been registered in parent config
                setattr(self, arg_name, locals()[arg_name])
 class SplitModel(DeltaBase):
    r""" The implementation of Adapter(`Parameter-Efficient Transfer Learning for NLP <https://arxiv.org/abs/1902.00751>`_ ) .
    Add adapter to the designated ``modified_modules``. In sequential paradigm, The modules' output is then passed into the adapter's
    post_forward.
    .. note::
        We **assume** the output of the modified module is the hidden state or a tuple where hidden state is the
        first element. This is true for most PLMs. However, we admit that currently it's not rigorous, We will improve
        it in the next version. Currently, if you encount an error here for you backbone, you can modify the code to
        get the hidden state.
    class attributes:
        - default_modified_modules = ["attn", "ff"] According to the Adapter paper, we add adapter to the attention layer
          and feed forward layer.
        - delta_type = "adapter"
    Args:
        backbone_model (:obj:`transformers.PretrainedModels`): The backbone model to be modified.
        bottleneck_dim (:obj:`int`): The dimension of the adapter's bottleneck.
        non_linearity (:obj:`str`): The non linearity of the adapter.
        sequential (:obj:`str`): Whether insert the adapter in a sequential manner, as opposed to a parallel manner.
                        See `Towards a Unified View of Parameter-Efficient Transfer Learning <https://arxiv.org/abs/2110.04366>`_
                        for detail.
        modified_modules (:obj:`List[str]`): For prefix tuning, the it must refer to an attention layer (Currently, only
                        the implemented ones)
        unfrozen_modules (:obj:`List[str]`, *optional*, default to :obj:`None`): The modules that should be unfrozen
                         together with the prefix parameters.
        common_structure (:obj:`bool`): whether using name-based addressing with a common structure mapping.
    """
    config_class = SplitConfig
    delta_type = "adapter"
    default_modified_modules = ["attn", "ff"]
    def __init__(self,
                 backbone_model: nn.Module,
                 batch_size: list = [8, 1, 7],
                 modified_modules: Optional[List[str]] = None,
                 exclude_modules: Optional[List[str]] = None,
                 unfrozen_modules: Optional[List[str]] = None,
                 common_structure: Optional[bool] = None,
                 interactive_modify: Optional[Union[bool, int]] = False,
                 ):
        DeltaBase.__init__(self,
                           backbone_model,
                           modified_modules=modified_modules,
                           exclude_modules=exclude_modules,
                           unfrozen_modules=unfrozen_modules,
                           common_structure=common_structure,
                           interactive_modify=interactive_modify,
                           )
        arg_names = get_arg_names_inside_func(self.__init__)
        for arg_name in arg_names:
            if not hasattr(self, arg_name): # not registered in parent class
                setattr(self, arg_name, locals()[arg_name])
        self.delta_modules = nn.ModuleList()
        self.add_all_delta_to_backbone(self.backbone_model,
                                   self.modified_modules,
                                   )
    def update_module(self, module: nn.Module, key: str):
        _, _, ref = self.find_module(module, key)
        splitlayer = SplitLayer(self.batch_size)
        for b in self.batch_size:
            splitlayer.append(self.new_module_like(ref))
        self.insert_sequential_module(ref, delta_module=splitlayer, delta_name="split")
    def new_module_like(self, module):
        module_device = get_device(module)
        adapterlayer = AdapterLayer()
        self.delta_modules.append(adapterlayer)
        return adapterlayer
--- a/test.py
+++ b/test.py
@ -0,0 +1,15 @@
 from transformers import BertModel
 model = BertModel.from_pretrained("bert-base-cased")
 from opendelta import Visualization
 Visualization(model).structure_graph()
 from opendelta import SplitModel
 delta_model = SplitModel(model, batch_size=[1]*16, modified_modules=['output.dense'])
 delta_model.log() # This will visualize the backbone after modification and other information.
 import torch
 x = torch.randint(0, 10, (16, 128)).cuda()
 import time
 model = model.cuda()
 st_time = time.time()
 for t in range(10):
    y = model(x)
 print(time.time() - st_time)