Merge pull request #1 from thunlp/main

Fetch
This commit is contained in:
DingDing 2022-03-21 21:36:32 +08:00 committed by GitHub
commit 3be2d66df4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 71 additions and 27 deletions

7
.gitignore vendored
View File

@ -19,6 +19,12 @@ log.txt
**/DeltaHub/ **/DeltaHub/
*beans/ *beans/
**/examples/*/configs/ **/examples/*/configs/
!examples/*/configs/config_gen.py
**/jupyter_notebook_examples/
!examples/jupyter_notebook_examples/*.py
!**/examples/*/configs/config_gen.py !**/examples/*/configs/config_gen.py
**/outputs_search/**/*.bin **/outputs_search/**/*.bin
**/outputs_search/**/*.pt **/outputs_search/**/*.pt
@ -29,3 +35,4 @@ log.txt
**/examples/examples_bmtrain/BMPretrain **/examples/examples_bmtrain/BMPretrain
**/examples/examples_bmtrain/BigModels/BigModels/results **/examples/examples_bmtrain/BigModels/BigModels/results
**/Delta_Memory/ **/Delta_Memory/

View File

@ -32,6 +32,8 @@ OpenDelta is a toolkit for parameter efficient methods (we dub it as *delta tuni
![How PLM changes using Delta-tuning](docs/source/imgs/demo.gif) ![How PLM changes using Delta-tuning](docs/source/imgs/demo.gif)
## Updates ## Updates
- 2022.03.20 add a [colab example](https://colab.research.google.com/drive/1hM_zu11s6plpK-YQSkz3CrowJyxtHneY?usp=sharing) to illustrate efficient training and space-saving multitask-serving.
- 2022.03.20 a new pip version released.
- 2022.02.16 support [regular expression](https://opendelta.readthedocs.io/en/latest/notes/namebasedaddr.html#regexexpr) in named-based addressing. - 2022.02.16 support [regular expression](https://opendelta.readthedocs.io/en/latest/notes/namebasedaddr.html#regexexpr) in named-based addressing.
## Installation ## Installation

Binary file not shown.

BIN
dist/opendelta-0.0.3.tar.gz vendored Normal file

Binary file not shown.

View File

@ -48,6 +48,7 @@ BaseConfigs['t5-base'] = {
"save_strategy": "steps" "save_strategy": "steps"
} }
BaseConfigs['t5-large'] = { BaseConfigs['t5-large'] = {
("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs", ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs",
"max_source_length", "max_source_length",
@ -107,6 +108,7 @@ BaseConfigs['t5-3b'] = {
# [ 32, 32, 32, 32, 32, 16, 32] + [32] * 8, # [ 32, 32, 32, 32, 32, 16, 32] + [32] * 8,
[ 32, 32, 32, 32, 32, 16, 32] + [8] * 8, [ 32, 32, 32, 32, 32, 16, 32] + [8] * 8,
[ 32, 32, 32, 32, 32, 16, 32] + [8] * 8, [ 32, 32, 32, 32, 32, 16, 32] + [8] * 8,
[0] *7 +[0] *8, [0] *7 +[0] *8,
[200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100], [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
[200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100], [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
@ -114,7 +116,6 @@ BaseConfigs['t5-3b'] = {
"do_train": True, "do_train": True,
"do_eval": True, "do_eval": True,
"do_test": True, "do_test": True,
"model_name_or_path": "/home/hushengding/plm_cache/t5-3b", "model_name_or_path": "/home/hushengding/plm_cache/t5-3b",
"tokenizer_name": "/home/hushengding/plm_cache/t5-3b", "tokenizer_name": "/home/hushengding/plm_cache/t5-3b",
"save_total_limit": 1, "save_total_limit": 1,
@ -255,6 +256,21 @@ AllConfigs['low_rank_adapter_t5-base'].update({
"low_rank_rank": 1, "low_rank_rank": 1,
}) })
AllConfigs['low_rank_adapter_t5-xxl'] = copy.deepcopy(BaseConfigs['t5-xxl'])
AllConfigs['low_rank_adapter_t5-xxl'].update({
"delta_type": "low_rank_adapter",
"learning_rate": 3e-4,
"unfrozen_modules": [
"deltas",
"layer_norm",
"final_layer_norm"
],
"output_dir": "outputs/low_rank_adapter/t5-xxl/",
"non_linearity": "gelu_new",
"low_rank_w_init": "glorot-uniform",
"low_rank_rank": 1,
})
AllConfigs['soft_prompt_t5-base'] = copy.deepcopy(BaseConfigs['t5-base']) AllConfigs['soft_prompt_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
AllConfigs['soft_prompt_t5-base'].update({ AllConfigs['soft_prompt_t5-base'].update({

View File

@ -632,7 +632,7 @@ class DeltaBase(nn.Module, SaveLoadMixin):
self.config = config self.config = config
def log(self, module=None, delta_ratio=True, trainable_ratio=True, visualization=True): def log(self, module=None, delta_ratio=True, trainable_ratio=True, visualization=True, cuda_memory=True):
r"""Log and visualize the result of applying delta. r"""Log and visualize the result of applying delta.
Possible Options are ``trainable_ratio``, Possible Options are ``trainable_ratio``,
``visualization``, ``delta_ratio``. ``visualization``, ``delta_ratio``.
@ -658,6 +658,15 @@ class DeltaBase(nn.Module, SaveLoadMixin):
n_delta = self.num_delta_parameters(module) n_delta = self.num_delta_parameters(module)
n_total = self.num_total_parameters(module) n_total = self.num_total_parameters(module)
logger.info("Delta Parameter Ratio: {:2f}%".format(n_delta/n_total*100)) logger.info("Delta Parameter Ratio: {:2f}%".format(n_delta/n_total*100))
if cuda_memory:
cudamem = 0
maxcudamem = 0
for device_id in range(torch.cuda.device_count()):
cudamem += torch.cuda.memory_allocated(f"cuda:{device_id}")/1024**3
maxcudamem += torch.cuda.max_memory_allocated(f"cuda:{device_id}")/1024**3
logger.info("Static Memory {:.2f} GB, Max Memory {:.2f} GB".format(cudamem, maxcudamem))
def num_delta_parameters(self, module: Optional[nn.Module]=None): def num_delta_parameters(self, module: Optional[nn.Module]=None):
r"""[NODOC] A small sugar function to get the number of trainable parameter in the backbone model. Often used to r"""[NODOC] A small sugar function to get the number of trainable parameter in the backbone model. Often used to
@ -678,7 +687,7 @@ class DeltaBase(nn.Module, SaveLoadMixin):
return pnum_tot return pnum_tot
# Two functions for plug and remove the delta model. # Two functions for plug and remove the delta model.
def attach(self, module: Optional[nn.Module]=None,): def attach(self, module: Optional[nn.Module]=None, reset_state_dict=True):
r"""Reattach the delta modules to the backbone. Note that this method can not be used to create new delta modules. r"""Reattach the delta modules to the backbone. Note that this method can not be used to create new delta modules.
Instead, a :meth:`DeltaBase.detach` should precede this method. Instead, a :meth:`DeltaBase.detach` should precede this method.
@ -708,9 +717,12 @@ class DeltaBase(nn.Module, SaveLoadMixin):
raise NotImplementedError raise NotImplementedError
_delta_info['state'] = "on" _delta_info['state'] = "on"
if reset_state_dict:
self.set_active_state_dict(module)
def detach(self, module: Optional[nn.Module]=None,):
def detach(self, module: Optional[nn.Module]=None, reset_state_dict=True):
r"""Detach the delta module from the backbone. The delta module is not deleted, but temporarily turned off. r"""Detach the delta module from the backbone. The delta module is not deleted, but temporarily turned off.
Use :meth:`DeltaBase.attach` to reattach the delta model to the backbone. Use :meth:`DeltaBase.attach` to reattach the delta model to the backbone.
@ -743,4 +755,10 @@ class DeltaBase(nn.Module, SaveLoadMixin):
raise NotImplementedError raise NotImplementedError
_delta_info['state'] = "off" _delta_info['state'] = "off"
if reset_state_dict:
try:
module.state_dict = module.state_dict.__wrapped__
except AttributeError:
pass

View File

@ -148,7 +148,7 @@ class LowRankAdapterModel(DeltaBase):
""" """
config_class = LowRankAdapterConfig config_class = LowRankAdapterConfig
delta_type = "lowrankadapter" delta_type = "low_rank_adapter"
default_modified_modules = ['attn', 'ff'] default_modified_modules = ['attn', 'ff']
def __init__(self, def __init__(self,
backbone_model: nn.Module, backbone_model: nn.Module,

View File

@ -113,18 +113,6 @@ def get_log_levels_dict():
return log_levels return log_levels
def get_logger(name: Optional[str] = None) -> logging.Logger:
"""
Return a logger with the specified name.
This function is not supposed to be directly accessed unless you are writing a custom transformers module.
"""
if name is None:
name = _get_library_name()
_configure_library_root_logger()
return logging.getLogger(name)
def get_verbosity() -> int: def get_verbosity() -> int:
""" """
@ -275,4 +263,17 @@ def warning_advice(self, *args, **kwargs):
logging.Logger.warning_advice = warning_advice logging.Logger.warning_advice = warning_advice
set_verbosity_debug()
def get_logger(name: Optional[str] = None, verbosity='info') -> logging.Logger:
"""
Return a logger with the specified name.
This function is not supposed to be directly accessed unless you are writing a custom transformers module.
"""
if name is None:
name = _get_library_name()
_configure_library_root_logger()
logger = logging.getLogger(name)
logger.setLevel(log_levels[verbosity])
return logger

View File

@ -1,8 +1,8 @@
torch>=1.9.0 torch>=1.8.0
transformers==4.10.0 transformers>=4.10.0
datasets==1.17.0 datasets>=1.17.0
sentencepiece==0.1.96 sentencepiece>=0.1.96
tqdm==4.62.2 tqdm>=4.62.2
loralib loralib
decorator decorator
rich rich

View File

@ -17,7 +17,7 @@ print(requires)
with open('README.md', 'r') as f: with open('README.md', 'r') as f:
setuptools.setup( setuptools.setup(
name = 'opendelta', name = 'opendelta',
version = '0.0.1', version = '0.0.3',
description = "An open source framework for delta learning (parameter efficient learning).", description = "An open source framework for delta learning (parameter efficient learning).",
long_description=open("README.md", "r", encoding="utf-8").read(), long_description=open("README.md", "r", encoding="utf-8").read(),
long_description_content_type="text/markdown", long_description_content_type="text/markdown",
@ -26,7 +26,7 @@ with open('README.md', 'r') as f:
license="Apache", license="Apache",
url="https://github.com/thunlp/OpenDelta", url="https://github.com/thunlp/OpenDelta",
keywords = ['PLM', 'Parameter-efficient-Learning', 'AI', 'NLP'], keywords = ['PLM', 'Parameter-efficient-Learning', 'AI', 'NLP'],
python_requires=">=3.8.0", python_requires=">=3.6.0",
install_requires=requires, install_requires=requires,
packages=setuptools.find_packages(), packages=setuptools.find_packages(),
classifiers=[ classifiers=[