commit
3be2d66df4
|
@ -19,6 +19,12 @@ log.txt
|
||||||
**/DeltaHub/
|
**/DeltaHub/
|
||||||
*beans/
|
*beans/
|
||||||
**/examples/*/configs/
|
**/examples/*/configs/
|
||||||
|
|
||||||
|
!examples/*/configs/config_gen.py
|
||||||
|
**/jupyter_notebook_examples/
|
||||||
|
!examples/jupyter_notebook_examples/*.py
|
||||||
|
|
||||||
|
|
||||||
!**/examples/*/configs/config_gen.py
|
!**/examples/*/configs/config_gen.py
|
||||||
**/outputs_search/**/*.bin
|
**/outputs_search/**/*.bin
|
||||||
**/outputs_search/**/*.pt
|
**/outputs_search/**/*.pt
|
||||||
|
@ -28,4 +34,5 @@ log.txt
|
||||||
**/examples/examples_bmtrain/BMTrain_stable
|
**/examples/examples_bmtrain/BMTrain_stable
|
||||||
**/examples/examples_bmtrain/BMPretrain
|
**/examples/examples_bmtrain/BMPretrain
|
||||||
**/examples/examples_bmtrain/BigModels/BigModels/results
|
**/examples/examples_bmtrain/BigModels/BigModels/results
|
||||||
**/Delta_Memory/
|
**/Delta_Memory/
|
||||||
|
|
||||||
|
|
|
@ -32,6 +32,8 @@ OpenDelta is a toolkit for parameter efficient methods (we dub it as *delta tuni
|
||||||
![How PLM changes using Delta-tuning](docs/source/imgs/demo.gif)
|
![How PLM changes using Delta-tuning](docs/source/imgs/demo.gif)
|
||||||
|
|
||||||
## Updates
|
## Updates
|
||||||
|
- 2022.03.20 add a [colab example](https://colab.research.google.com/drive/1hM_zu11s6plpK-YQSkz3CrowJyxtHneY?usp=sharing) to illustrate efficient training and space-saving multitask-serving.
|
||||||
|
- 2022.03.20 a new pip version released.
|
||||||
- 2022.02.16 support [regular expression](https://opendelta.readthedocs.io/en/latest/notes/namebasedaddr.html#regexexpr) in named-based addressing.
|
- 2022.02.16 support [regular expression](https://opendelta.readthedocs.io/en/latest/notes/namebasedaddr.html#regexexpr) in named-based addressing.
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -48,6 +48,7 @@ BaseConfigs['t5-base'] = {
|
||||||
"save_strategy": "steps"
|
"save_strategy": "steps"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
BaseConfigs['t5-large'] = {
|
BaseConfigs['t5-large'] = {
|
||||||
("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs",
|
("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs",
|
||||||
"max_source_length",
|
"max_source_length",
|
||||||
|
@ -107,6 +108,7 @@ BaseConfigs['t5-3b'] = {
|
||||||
# [ 32, 32, 32, 32, 32, 16, 32] + [32] * 8,
|
# [ 32, 32, 32, 32, 32, 16, 32] + [32] * 8,
|
||||||
[ 32, 32, 32, 32, 32, 16, 32] + [8] * 8,
|
[ 32, 32, 32, 32, 32, 16, 32] + [8] * 8,
|
||||||
[ 32, 32, 32, 32, 32, 16, 32] + [8] * 8,
|
[ 32, 32, 32, 32, 32, 16, 32] + [8] * 8,
|
||||||
|
|
||||||
[0] *7 +[0] *8,
|
[0] *7 +[0] *8,
|
||||||
[200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
|
[200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
|
||||||
[200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
|
[200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
|
||||||
|
@ -114,7 +116,6 @@ BaseConfigs['t5-3b'] = {
|
||||||
"do_train": True,
|
"do_train": True,
|
||||||
"do_eval": True,
|
"do_eval": True,
|
||||||
"do_test": True,
|
"do_test": True,
|
||||||
|
|
||||||
"model_name_or_path": "/home/hushengding/plm_cache/t5-3b",
|
"model_name_or_path": "/home/hushengding/plm_cache/t5-3b",
|
||||||
"tokenizer_name": "/home/hushengding/plm_cache/t5-3b",
|
"tokenizer_name": "/home/hushengding/plm_cache/t5-3b",
|
||||||
"save_total_limit": 1,
|
"save_total_limit": 1,
|
||||||
|
@ -255,6 +256,21 @@ AllConfigs['low_rank_adapter_t5-base'].update({
|
||||||
"low_rank_rank": 1,
|
"low_rank_rank": 1,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
AllConfigs['low_rank_adapter_t5-xxl'] = copy.deepcopy(BaseConfigs['t5-xxl'])
|
||||||
|
AllConfigs['low_rank_adapter_t5-xxl'].update({
|
||||||
|
"delta_type": "low_rank_adapter",
|
||||||
|
"learning_rate": 3e-4,
|
||||||
|
"unfrozen_modules": [
|
||||||
|
"deltas",
|
||||||
|
"layer_norm",
|
||||||
|
"final_layer_norm"
|
||||||
|
],
|
||||||
|
"output_dir": "outputs/low_rank_adapter/t5-xxl/",
|
||||||
|
"non_linearity": "gelu_new",
|
||||||
|
"low_rank_w_init": "glorot-uniform",
|
||||||
|
"low_rank_rank": 1,
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
AllConfigs['soft_prompt_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
|
AllConfigs['soft_prompt_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
|
||||||
AllConfigs['soft_prompt_t5-base'].update({
|
AllConfigs['soft_prompt_t5-base'].update({
|
||||||
|
|
|
@ -632,7 +632,7 @@ class DeltaBase(nn.Module, SaveLoadMixin):
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
|
|
||||||
def log(self, module=None, delta_ratio=True, trainable_ratio=True, visualization=True):
|
def log(self, module=None, delta_ratio=True, trainable_ratio=True, visualization=True, cuda_memory=True):
|
||||||
r"""Log and visualize the result of applying delta.
|
r"""Log and visualize the result of applying delta.
|
||||||
Possible Options are ``trainable_ratio``,
|
Possible Options are ``trainable_ratio``,
|
||||||
``visualization``, ``delta_ratio``.
|
``visualization``, ``delta_ratio``.
|
||||||
|
@ -658,6 +658,15 @@ class DeltaBase(nn.Module, SaveLoadMixin):
|
||||||
n_delta = self.num_delta_parameters(module)
|
n_delta = self.num_delta_parameters(module)
|
||||||
n_total = self.num_total_parameters(module)
|
n_total = self.num_total_parameters(module)
|
||||||
logger.info("Delta Parameter Ratio: {:2f}%".format(n_delta/n_total*100))
|
logger.info("Delta Parameter Ratio: {:2f}%".format(n_delta/n_total*100))
|
||||||
|
if cuda_memory:
|
||||||
|
cudamem = 0
|
||||||
|
maxcudamem = 0
|
||||||
|
for device_id in range(torch.cuda.device_count()):
|
||||||
|
cudamem += torch.cuda.memory_allocated(f"cuda:{device_id}")/1024**3
|
||||||
|
maxcudamem += torch.cuda.max_memory_allocated(f"cuda:{device_id}")/1024**3
|
||||||
|
logger.info("Static Memory {:.2f} GB, Max Memory {:.2f} GB".format(cudamem, maxcudamem))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def num_delta_parameters(self, module: Optional[nn.Module]=None):
|
def num_delta_parameters(self, module: Optional[nn.Module]=None):
|
||||||
r"""[NODOC] A small sugar function to get the number of trainable parameter in the backbone model. Often used to
|
r"""[NODOC] A small sugar function to get the number of trainable parameter in the backbone model. Often used to
|
||||||
|
@ -678,7 +687,7 @@ class DeltaBase(nn.Module, SaveLoadMixin):
|
||||||
return pnum_tot
|
return pnum_tot
|
||||||
|
|
||||||
# Two functions for plug and remove the delta model.
|
# Two functions for plug and remove the delta model.
|
||||||
def attach(self, module: Optional[nn.Module]=None,):
|
def attach(self, module: Optional[nn.Module]=None, reset_state_dict=True):
|
||||||
r"""Reattach the delta modules to the backbone. Note that this method can not be used to create new delta modules.
|
r"""Reattach the delta modules to the backbone. Note that this method can not be used to create new delta modules.
|
||||||
Instead, a :meth:`DeltaBase.detach` should precede this method.
|
Instead, a :meth:`DeltaBase.detach` should precede this method.
|
||||||
|
|
||||||
|
@ -707,10 +716,13 @@ class DeltaBase(nn.Module, SaveLoadMixin):
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
_delta_info['state'] = "on"
|
_delta_info['state'] = "on"
|
||||||
|
if reset_state_dict:
|
||||||
|
self.set_active_state_dict(module)
|
||||||
|
|
||||||
|
|
||||||
def detach(self, module: Optional[nn.Module]=None,):
|
|
||||||
|
def detach(self, module: Optional[nn.Module]=None, reset_state_dict=True):
|
||||||
r"""Detach the delta module from the backbone. The delta module is not deleted, but temporarily turned off.
|
r"""Detach the delta module from the backbone. The delta module is not deleted, but temporarily turned off.
|
||||||
Use :meth:`DeltaBase.attach` to reattach the delta model to the backbone.
|
Use :meth:`DeltaBase.attach` to reattach the delta model to the backbone.
|
||||||
|
|
||||||
|
@ -743,4 +755,10 @@ class DeltaBase(nn.Module, SaveLoadMixin):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
_delta_info['state'] = "off"
|
_delta_info['state'] = "off"
|
||||||
|
if reset_state_dict:
|
||||||
|
try:
|
||||||
|
module.state_dict = module.state_dict.__wrapped__
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -148,7 +148,7 @@ class LowRankAdapterModel(DeltaBase):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
config_class = LowRankAdapterConfig
|
config_class = LowRankAdapterConfig
|
||||||
delta_type = "lowrankadapter"
|
delta_type = "low_rank_adapter"
|
||||||
default_modified_modules = ['attn', 'ff']
|
default_modified_modules = ['attn', 'ff']
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
backbone_model: nn.Module,
|
backbone_model: nn.Module,
|
||||||
|
|
|
@ -113,18 +113,6 @@ def get_log_levels_dict():
|
||||||
return log_levels
|
return log_levels
|
||||||
|
|
||||||
|
|
||||||
def get_logger(name: Optional[str] = None) -> logging.Logger:
|
|
||||||
"""
|
|
||||||
Return a logger with the specified name.
|
|
||||||
This function is not supposed to be directly accessed unless you are writing a custom transformers module.
|
|
||||||
"""
|
|
||||||
|
|
||||||
if name is None:
|
|
||||||
name = _get_library_name()
|
|
||||||
|
|
||||||
_configure_library_root_logger()
|
|
||||||
return logging.getLogger(name)
|
|
||||||
|
|
||||||
|
|
||||||
def get_verbosity() -> int:
|
def get_verbosity() -> int:
|
||||||
"""
|
"""
|
||||||
|
@ -275,4 +263,17 @@ def warning_advice(self, *args, **kwargs):
|
||||||
|
|
||||||
logging.Logger.warning_advice = warning_advice
|
logging.Logger.warning_advice = warning_advice
|
||||||
|
|
||||||
set_verbosity_debug()
|
|
||||||
|
def get_logger(name: Optional[str] = None, verbosity='info') -> logging.Logger:
|
||||||
|
"""
|
||||||
|
Return a logger with the specified name.
|
||||||
|
This function is not supposed to be directly accessed unless you are writing a custom transformers module.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if name is None:
|
||||||
|
name = _get_library_name()
|
||||||
|
|
||||||
|
_configure_library_root_logger()
|
||||||
|
logger = logging.getLogger(name)
|
||||||
|
logger.setLevel(log_levels[verbosity])
|
||||||
|
return logger
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
torch>=1.9.0
|
torch>=1.8.0
|
||||||
transformers==4.10.0
|
transformers>=4.10.0
|
||||||
datasets==1.17.0
|
datasets>=1.17.0
|
||||||
sentencepiece==0.1.96
|
sentencepiece>=0.1.96
|
||||||
tqdm==4.62.2
|
tqdm>=4.62.2
|
||||||
loralib
|
loralib
|
||||||
decorator
|
decorator
|
||||||
rich
|
rich
|
||||||
|
|
4
setup.py
4
setup.py
|
@ -17,7 +17,7 @@ print(requires)
|
||||||
with open('README.md', 'r') as f:
|
with open('README.md', 'r') as f:
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name = 'opendelta',
|
name = 'opendelta',
|
||||||
version = '0.0.1',
|
version = '0.0.3',
|
||||||
description = "An open source framework for delta learning (parameter efficient learning).",
|
description = "An open source framework for delta learning (parameter efficient learning).",
|
||||||
long_description=open("README.md", "r", encoding="utf-8").read(),
|
long_description=open("README.md", "r", encoding="utf-8").read(),
|
||||||
long_description_content_type="text/markdown",
|
long_description_content_type="text/markdown",
|
||||||
|
@ -26,7 +26,7 @@ with open('README.md', 'r') as f:
|
||||||
license="Apache",
|
license="Apache",
|
||||||
url="https://github.com/thunlp/OpenDelta",
|
url="https://github.com/thunlp/OpenDelta",
|
||||||
keywords = ['PLM', 'Parameter-efficient-Learning', 'AI', 'NLP'],
|
keywords = ['PLM', 'Parameter-efficient-Learning', 'AI', 'NLP'],
|
||||||
python_requires=">=3.8.0",
|
python_requires=">=3.6.0",
|
||||||
install_requires=requires,
|
install_requires=requires,
|
||||||
packages=setuptools.find_packages(),
|
packages=setuptools.find_packages(),
|
||||||
classifiers=[
|
classifiers=[
|
||||||
|
|
Loading…
Reference in New Issue