commit
703bc932ea
|
@ -92,7 +92,7 @@ class DeltaBase(nn.Module, SaveLoadMixin):
|
|||
modified_modules: Optional[List[str]] = None,
|
||||
unfrozen_modules: Optional[List[str]] = None,
|
||||
interactive_modify: Optional[Union[bool, int]] = False,
|
||||
common_structure = False,
|
||||
common_structure: Optional[bool] = False,
|
||||
):
|
||||
nn.Module.__init__(self)
|
||||
# register the backbone model after init using self.__dict__ method to avoid adding backbone_model
|
||||
|
|
|
@ -181,7 +181,7 @@ class AdapterModel(DeltaBase):
|
|||
the implemented ones)
|
||||
unfrozen_modules (:obj:`List[str]`, *optional*, default to :obj:`None`): The modules that should be unfrozen
|
||||
together with the prefix parameters.
|
||||
common_structure (:obj:`bool`): whether using name-based addressing witha common structure mapping.
|
||||
common_structure (:obj:`bool`): whether using name-based addressing with a common structure mapping.
|
||||
|
||||
"""
|
||||
config_class = AdapterConfig
|
||||
|
@ -192,8 +192,8 @@ class AdapterModel(DeltaBase):
|
|||
bottleneck_dim: Optional[int]=24,
|
||||
non_linearity: Optional[str]='gelu_new',
|
||||
sequential: Optional[str] = True,
|
||||
modified_modules: Optional[bool] = None,
|
||||
unfrozen_modules: Optional[bool] = None,
|
||||
modified_modules: Optional[List[str]] = None,
|
||||
unfrozen_modules: Optional[List[str]] = None,
|
||||
common_structure: Optional[bool] = None,
|
||||
interactive_modify: Optional[Union[bool, int]] = False,
|
||||
):
|
||||
|
|
|
@ -102,7 +102,7 @@ class BitFitModel(DeltaBase):
|
|||
the implemented ones)
|
||||
unfrozen_modules (:obj:`List[str]`, *optional*, default to :obj:`None`): The modules that should be unfrozen
|
||||
together with the prefix parameters.
|
||||
common_structure (:obj:`bool`): whether using name-based addressing witha common structure mapping.
|
||||
common_structure (:obj:`bool`): whether using name-based addressing with a common structure mapping.
|
||||
|
||||
"""
|
||||
|
||||
|
@ -112,8 +112,8 @@ class BitFitModel(DeltaBase):
|
|||
default_modified_modules = ["attn", "ff", "layer_norm","lm_head.proj"] # modify all the bias parameter in attention and feed-forward layer.
|
||||
def __init__(self,
|
||||
backbone_model: nn.Module,
|
||||
modified_modules: Optional[bool] = None,
|
||||
unfrozen_modules: Optional[bool] = None,
|
||||
modified_modules: Optional[List[str]] = None,
|
||||
unfrozen_modules: Optional[List[str]] = None,
|
||||
common_structure: Optional[bool] = None,
|
||||
interactive_modify: Optional[Union[bool, int]] = False,
|
||||
):
|
||||
|
|
|
@ -188,7 +188,7 @@ class CompacterModel(DeltaBase):
|
|||
the implemented ones)
|
||||
unfrozen_modules (:obj:`List[str]`, *optional*, default to :obj:`None`): The modules that should be unfrozen
|
||||
together with the prefix parameters.
|
||||
common_structure (:obj:`bool`, *optional*, default to :obj:`None`): whether using name-based addressing witha common structure mapping.
|
||||
common_structure (:obj:`bool`, *optional*, default to :obj:`None`): whether using name-based addressing with a common structure mapping.
|
||||
reduction_factor (:obj:`int`, *optional*, default to ``16``): bottleneck_dim = hidden_dim//reduction_factor
|
||||
non_linearity (:obj:`str`, *optional*, default to ``"gelu_new"``): The non linearity activation used in between the down
|
||||
projecter and the up projecter.
|
||||
|
@ -216,8 +216,8 @@ class CompacterModel(DeltaBase):
|
|||
default_modified_modules = ["attn", "ff"]
|
||||
def __init__(self,
|
||||
backbone_model,
|
||||
modified_modules: Optional[bool] = None,
|
||||
unfrozen_modules: Optional[bool] = None,
|
||||
modified_modules: Optional[List[str]] = None,
|
||||
unfrozen_modules: Optional[List[str]] = None,
|
||||
common_structure: Optional[bool] = None,
|
||||
interactive_modify: Optional[Union[bool, int]] = False,
|
||||
reduction_factor=16,
|
||||
|
|
|
@ -88,13 +88,13 @@ class LoraModel(DeltaBase):
|
|||
Args:
|
||||
backbone_model (:obj:`transformers.PretrainedModels`): The backbone model to be modified.
|
||||
lora_r (:obj:`int`, *optional*): the rank of the lora parameters. The smaller lora_r is , the fewer parameters lora has.
|
||||
lora_alpha (:obj:`bool`, *optional*): A hyper-parameter to control the init scale of loralib.linear .
|
||||
lora_dropout (:obj:`bool`, *optional*): The dropout rate in lora.linear.
|
||||
lora_alpha (:obj:`int`, *optional*): A hyper-parameter to control the init scale of loralib.linear .
|
||||
lora_dropout (:obj:`float`, *optional*): The dropout rate in lora.linear.
|
||||
modified_modules (:obj:`List[str]`): For prefix tuning, the it must refer to an attention layer (Currently, only
|
||||
the implemented ones)
|
||||
unfrozen_modules (:obj:`List[str]`, *optional*, default to :obj:`None`): The modules that should be unfrozen
|
||||
together with the prefix parameters.
|
||||
common_structure (:obj:`bool`): whether using name-based addressing witha common structure mapping.
|
||||
common_structure (:obj:`bool`): whether using name-based addressing with a common structure mapping.
|
||||
|
||||
"""
|
||||
|
||||
|
|
|
@ -46,13 +46,13 @@ class LoraModel(DeltaBase):
|
|||
Args:
|
||||
backbone_model (:obj:`transformers.PretrainedModels`): The backbone model to be modified.
|
||||
lora_r (:obj:`int`, *optional*): the rank of the lora parameters. The smaller lora_r is , the fewer parameters lora has.
|
||||
lora_alpha (:obj:`bool`, *optional*): A hyper-parameter to control the init scale of loralib.linear .
|
||||
lora_dropout (:obj:`bool`, *optional*): The dropout rate in lora.linear.
|
||||
lora_alpha (:obj:`int`, *optional*): A hyper-parameter to control the init scale of loralib.linear .
|
||||
lora_dropout (:obj:`float`, *optional*): The dropout rate in lora.linear.
|
||||
modified_modules (:obj:`List[str]`): For prefix tuning, the it must refer to an attention layer (Currently, only
|
||||
the implemented ones)
|
||||
unfrozen_modules (:obj:`List[str]`, *optional*, default to :obj:`None`): The modules that should be unfrozen
|
||||
together with the prefix parameters.
|
||||
common_structure (:obj:`bool`): whether using name-based addressing witha common structure mapping.
|
||||
common_structure (:obj:`bool`): whether using name-based addressing with a common structure mapping.
|
||||
|
||||
"""
|
||||
|
||||
|
@ -64,8 +64,8 @@ class LoraModel(DeltaBase):
|
|||
lora_r=8,
|
||||
lora_alpha=16,
|
||||
lora_dropout=0.0,
|
||||
modified_modules: Optional[bool] = None,
|
||||
unfrozen_modules: Optional[bool] = None,
|
||||
modified_modules: Optional[List[str]] = None,
|
||||
unfrozen_modules: Optional[List[str]] = None,
|
||||
common_structure: Optional[bool] = None,
|
||||
interactive_modify: Optional[Union[bool, int]] = False,
|
||||
):
|
||||
|
|
|
@ -143,7 +143,7 @@ class LowRankAdapterModel(DeltaBase):
|
|||
the implemented ones)
|
||||
unfrozen_modules (:obj:`List[str]`, *optional*, default to :obj:`None`): The modules that should be unfrozen
|
||||
together with the prefix parameters.
|
||||
common_structure (:obj:`bool`, *optional*, default to :obj:`None`): whether using name-based addressing witha common structure mapping.
|
||||
common_structure (:obj:`bool`, *optional*, default to :obj:`None`): whether using name-based addressing with a common structure mapping.
|
||||
|
||||
"""
|
||||
|
||||
|
@ -156,8 +156,8 @@ class LowRankAdapterModel(DeltaBase):
|
|||
non_linearity = "gelu_new",
|
||||
low_rank_w_init = "glorot-uniform",
|
||||
low_rank_rank = 1,
|
||||
modified_modules: Optional[bool] = None,
|
||||
unfrozen_modules: Optional[bool] = None,
|
||||
modified_modules: Optional[List[str]] = None,
|
||||
unfrozen_modules: Optional[List[str]] = None,
|
||||
common_structure: Optional[bool] = None,
|
||||
interactive_modify: Optional[Union[bool, int]] = False,
|
||||
):
|
||||
|
|
|
@ -453,7 +453,7 @@ class PrefixModel(DeltaBase):
|
|||
the implemented ones)
|
||||
unfrozen_modules (:obj:`List[str]`, *optional*, default to :obj:`None`): The modules that should be unfrozen
|
||||
together with the prefix parameters.
|
||||
common_structure (:obj:`bool`): whether using name-based addressing witha common structure mapping.
|
||||
common_structure (:obj:`bool`): whether using name-based addressing with a common structure mapping.
|
||||
|
||||
"""
|
||||
config_class = PrefixConfig
|
||||
|
|
|
@ -134,13 +134,13 @@ class SoftPromptModel(DeltaBase):
|
|||
Args:
|
||||
backbone_model (:obj:`transformers.PretrainedModels`): The backbone model to be modified.
|
||||
soft_token_num (:obj:`int`, *optional*): num of new tokens to add in the front of the input.
|
||||
init_range (:obj:`bool`, *optional*): If initialize new tokens randomly, the random range of uniform distribution.
|
||||
init_range (:obj:`float`, *optional*): If initialize new tokens randomly, the random range of uniform distribution.
|
||||
token_init (:obj:`bool`, *optional*, default to :obj:`True`): Whether to initialize the new tokens with tokens of the plm
|
||||
modified_modules (:obj:`List[str]`): For prefix tuning, the it must refer to an attention layer (Currently, only
|
||||
the implemented ones)
|
||||
unfrozen_modules (:obj:`List[str]`, *optional*, default to :obj:`None`): The modules that should be unfrozen
|
||||
together with the prefix parameters.
|
||||
common_structure (:obj:`bool`): whether using name-based addressing witha common structure mapping.
|
||||
common_structure (:obj:`bool`): whether using name-based addressing with a common structure mapping.
|
||||
|
||||
"""
|
||||
config_class = SoftPromptConfig
|
||||
|
@ -151,8 +151,8 @@ class SoftPromptModel(DeltaBase):
|
|||
soft_token_num=100,
|
||||
init_range = 0.5,
|
||||
token_init=True,
|
||||
modified_modules: Optional[bool] = None,
|
||||
unfrozen_modules: Optional[bool] = None,
|
||||
modified_modules: Optional[List[str]] = None,
|
||||
unfrozen_modules: Optional[List[str]] = None,
|
||||
common_structure: Optional[bool] = None,
|
||||
interactive_modify: Optional[Union[bool, int]] = False,
|
||||
):
|
||||
|
|
Loading…
Reference in New Issue