Merge pull request #1 from thunlp/main

Fetch
This commit is contained in:
DingDing 2022-03-21 21:36:32 +08:00 committed by GitHub
commit 3be2d66df4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 71 additions and 27 deletions

9
.gitignore vendored
View File

@ -19,6 +19,12 @@ log.txt
**/DeltaHub/
*beans/
**/examples/*/configs/
!examples/*/configs/config_gen.py
**/jupyter_notebook_examples/
!examples/jupyter_notebook_examples/*.py
!**/examples/*/configs/config_gen.py
**/outputs_search/**/*.bin
**/outputs_search/**/*.pt
@ -28,4 +34,5 @@ log.txt
**/examples/examples_bmtrain/BMTrain_stable
**/examples/examples_bmtrain/BMPretrain
**/examples/examples_bmtrain/BigModels/BigModels/results
**/Delta_Memory/
**/Delta_Memory/

View File

@ -32,6 +32,8 @@ OpenDelta is a toolkit for parameter efficient methods (we dub it as *delta tuni
![How PLM changes using Delta-tuning](docs/source/imgs/demo.gif)
## Updates
- 2022.03.20 add a [colab example](https://colab.research.google.com/drive/1hM_zu11s6plpK-YQSkz3CrowJyxtHneY?usp=sharing) to illustrate efficient training and space-saving multitask-serving.
- 2022.03.20 a new pip version released.
- 2022.02.16 support [regular expression](https://opendelta.readthedocs.io/en/latest/notes/namebasedaddr.html#regexexpr) in named-based addressing.
## Installation

Binary file not shown.

BIN
dist/opendelta-0.0.3.tar.gz vendored Normal file

Binary file not shown.

View File

@ -48,6 +48,7 @@ BaseConfigs['t5-base'] = {
"save_strategy": "steps"
}
BaseConfigs['t5-large'] = {
("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs",
"max_source_length",
@ -107,6 +108,7 @@ BaseConfigs['t5-3b'] = {
# [ 32, 32, 32, 32, 32, 16, 32] + [32] * 8,
[ 32, 32, 32, 32, 32, 16, 32] + [8] * 8,
[ 32, 32, 32, 32, 32, 16, 32] + [8] * 8,
[0] *7 +[0] *8,
[200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
[200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
@ -114,7 +116,6 @@ BaseConfigs['t5-3b'] = {
"do_train": True,
"do_eval": True,
"do_test": True,
"model_name_or_path": "/home/hushengding/plm_cache/t5-3b",
"tokenizer_name": "/home/hushengding/plm_cache/t5-3b",
"save_total_limit": 1,
@ -255,6 +256,21 @@ AllConfigs['low_rank_adapter_t5-base'].update({
"low_rank_rank": 1,
})
AllConfigs['low_rank_adapter_t5-xxl'] = copy.deepcopy(BaseConfigs['t5-xxl'])
AllConfigs['low_rank_adapter_t5-xxl'].update({
"delta_type": "low_rank_adapter",
"learning_rate": 3e-4,
"unfrozen_modules": [
"deltas",
"layer_norm",
"final_layer_norm"
],
"output_dir": "outputs/low_rank_adapter/t5-xxl/",
"non_linearity": "gelu_new",
"low_rank_w_init": "glorot-uniform",
"low_rank_rank": 1,
})
AllConfigs['soft_prompt_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
AllConfigs['soft_prompt_t5-base'].update({

View File

@ -632,7 +632,7 @@ class DeltaBase(nn.Module, SaveLoadMixin):
self.config = config
def log(self, module=None, delta_ratio=True, trainable_ratio=True, visualization=True):
def log(self, module=None, delta_ratio=True, trainable_ratio=True, visualization=True, cuda_memory=True):
r"""Log and visualize the result of applying delta.
Possible Options are ``trainable_ratio``,
``visualization``, ``delta_ratio``.
@ -658,6 +658,15 @@ class DeltaBase(nn.Module, SaveLoadMixin):
n_delta = self.num_delta_parameters(module)
n_total = self.num_total_parameters(module)
logger.info("Delta Parameter Ratio: {:2f}%".format(n_delta/n_total*100))
if cuda_memory:
cudamem = 0
maxcudamem = 0
for device_id in range(torch.cuda.device_count()):
cudamem += torch.cuda.memory_allocated(f"cuda:{device_id}")/1024**3
maxcudamem += torch.cuda.max_memory_allocated(f"cuda:{device_id}")/1024**3
logger.info("Static Memory {:.2f} GB, Max Memory {:.2f} GB".format(cudamem, maxcudamem))
def num_delta_parameters(self, module: Optional[nn.Module]=None):
r"""[NODOC] A small sugar function to get the number of trainable parameter in the backbone model. Often used to
@ -678,7 +687,7 @@ class DeltaBase(nn.Module, SaveLoadMixin):
return pnum_tot
# Two functions for plug and remove the delta model.
def attach(self, module: Optional[nn.Module]=None,):
def attach(self, module: Optional[nn.Module]=None, reset_state_dict=True):
r"""Reattach the delta modules to the backbone. Note that this method can not be used to create new delta modules.
Instead, a :meth:`DeltaBase.detach` should precede this method.
@ -707,10 +716,13 @@ class DeltaBase(nn.Module, SaveLoadMixin):
else:
raise NotImplementedError
_delta_info['state'] = "on"
_delta_info['state'] = "on"
if reset_state_dict:
self.set_active_state_dict(module)
def detach(self, module: Optional[nn.Module]=None,):
def detach(self, module: Optional[nn.Module]=None, reset_state_dict=True):
r"""Detach the delta module from the backbone. The delta module is not deleted, but temporarily turned off.
Use :meth:`DeltaBase.attach` to reattach the delta model to the backbone.
@ -743,4 +755,10 @@ class DeltaBase(nn.Module, SaveLoadMixin):
raise NotImplementedError
_delta_info['state'] = "off"
if reset_state_dict:
try:
module.state_dict = module.state_dict.__wrapped__
except AttributeError:
pass

View File

@ -148,7 +148,7 @@ class LowRankAdapterModel(DeltaBase):
"""
config_class = LowRankAdapterConfig
delta_type = "lowrankadapter"
delta_type = "low_rank_adapter"
default_modified_modules = ['attn', 'ff']
def __init__(self,
backbone_model: nn.Module,

View File

@ -113,18 +113,6 @@ def get_log_levels_dict():
return log_levels
def get_logger(name: Optional[str] = None) -> logging.Logger:
"""
Return a logger with the specified name.
This function is not supposed to be directly accessed unless you are writing a custom transformers module.
"""
if name is None:
name = _get_library_name()
_configure_library_root_logger()
return logging.getLogger(name)
def get_verbosity() -> int:
"""
@ -275,4 +263,17 @@ def warning_advice(self, *args, **kwargs):
logging.Logger.warning_advice = warning_advice
set_verbosity_debug()
def get_logger(name: Optional[str] = None, verbosity='info') -> logging.Logger:
"""
Return a logger with the specified name.
This function is not supposed to be directly accessed unless you are writing a custom transformers module.
"""
if name is None:
name = _get_library_name()
_configure_library_root_logger()
logger = logging.getLogger(name)
logger.setLevel(log_levels[verbosity])
return logger

View File

@ -1,8 +1,8 @@
torch>=1.9.0
transformers==4.10.0
datasets==1.17.0
sentencepiece==0.1.96
tqdm==4.62.2
torch>=1.8.0
transformers>=4.10.0
datasets>=1.17.0
sentencepiece>=0.1.96
tqdm>=4.62.2
loralib
decorator
rich

View File

@ -17,7 +17,7 @@ print(requires)
with open('README.md', 'r') as f:
setuptools.setup(
name = 'opendelta',
version = '0.0.1',
version = '0.0.3',
description = "An open source framework for delta learning (parameter efficient learning).",
long_description=open("README.md", "r", encoding="utf-8").read(),
long_description_content_type="text/markdown",
@ -26,7 +26,7 @@ with open('README.md', 'r') as f:
license="Apache",
url="https://github.com/thunlp/OpenDelta",
keywords = ['PLM', 'Parameter-efficient-Learning', 'AI', 'NLP'],
python_requires=">=3.8.0",
python_requires=">=3.6.0",
install_requires=requires,
packages=setuptools.find_packages(),
classifiers=[