commit
3be2d66df4
|
@ -19,6 +19,12 @@ log.txt
|
|||
**/DeltaHub/
|
||||
*beans/
|
||||
**/examples/*/configs/
|
||||
|
||||
!examples/*/configs/config_gen.py
|
||||
**/jupyter_notebook_examples/
|
||||
!examples/jupyter_notebook_examples/*.py
|
||||
|
||||
|
||||
!**/examples/*/configs/config_gen.py
|
||||
**/outputs_search/**/*.bin
|
||||
**/outputs_search/**/*.pt
|
||||
|
@ -28,4 +34,5 @@ log.txt
|
|||
**/examples/examples_bmtrain/BMTrain_stable
|
||||
**/examples/examples_bmtrain/BMPretrain
|
||||
**/examples/examples_bmtrain/BigModels/BigModels/results
|
||||
**/Delta_Memory/
|
||||
**/Delta_Memory/
|
||||
|
||||
|
|
|
@ -32,6 +32,8 @@ OpenDelta is a toolkit for parameter efficient methods (we dub it as *delta tuni
|
|||
![How PLM changes using Delta-tuning](docs/source/imgs/demo.gif)
|
||||
|
||||
## Updates
|
||||
- 2022.03.20 add a [colab example](https://colab.research.google.com/drive/1hM_zu11s6plpK-YQSkz3CrowJyxtHneY?usp=sharing) to illustrate efficient training and space-saving multitask-serving.
|
||||
- 2022.03.20 a new pip version released.
|
||||
- 2022.02.16 support [regular expression](https://opendelta.readthedocs.io/en/latest/notes/namebasedaddr.html#regexexpr) in named-based addressing.
|
||||
|
||||
## Installation
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -48,6 +48,7 @@ BaseConfigs['t5-base'] = {
|
|||
"save_strategy": "steps"
|
||||
}
|
||||
|
||||
|
||||
BaseConfigs['t5-large'] = {
|
||||
("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs",
|
||||
"max_source_length",
|
||||
|
@ -107,6 +108,7 @@ BaseConfigs['t5-3b'] = {
|
|||
# [ 32, 32, 32, 32, 32, 16, 32] + [32] * 8,
|
||||
[ 32, 32, 32, 32, 32, 16, 32] + [8] * 8,
|
||||
[ 32, 32, 32, 32, 32, 16, 32] + [8] * 8,
|
||||
|
||||
[0] *7 +[0] *8,
|
||||
[200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
|
||||
[200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
|
||||
|
@ -114,7 +116,6 @@ BaseConfigs['t5-3b'] = {
|
|||
"do_train": True,
|
||||
"do_eval": True,
|
||||
"do_test": True,
|
||||
|
||||
"model_name_or_path": "/home/hushengding/plm_cache/t5-3b",
|
||||
"tokenizer_name": "/home/hushengding/plm_cache/t5-3b",
|
||||
"save_total_limit": 1,
|
||||
|
@ -255,6 +256,21 @@ AllConfigs['low_rank_adapter_t5-base'].update({
|
|||
"low_rank_rank": 1,
|
||||
})
|
||||
|
||||
AllConfigs['low_rank_adapter_t5-xxl'] = copy.deepcopy(BaseConfigs['t5-xxl'])
|
||||
AllConfigs['low_rank_adapter_t5-xxl'].update({
|
||||
"delta_type": "low_rank_adapter",
|
||||
"learning_rate": 3e-4,
|
||||
"unfrozen_modules": [
|
||||
"deltas",
|
||||
"layer_norm",
|
||||
"final_layer_norm"
|
||||
],
|
||||
"output_dir": "outputs/low_rank_adapter/t5-xxl/",
|
||||
"non_linearity": "gelu_new",
|
||||
"low_rank_w_init": "glorot-uniform",
|
||||
"low_rank_rank": 1,
|
||||
})
|
||||
|
||||
|
||||
AllConfigs['soft_prompt_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
|
||||
AllConfigs['soft_prompt_t5-base'].update({
|
||||
|
|
|
@ -632,7 +632,7 @@ class DeltaBase(nn.Module, SaveLoadMixin):
|
|||
self.config = config
|
||||
|
||||
|
||||
def log(self, module=None, delta_ratio=True, trainable_ratio=True, visualization=True):
|
||||
def log(self, module=None, delta_ratio=True, trainable_ratio=True, visualization=True, cuda_memory=True):
|
||||
r"""Log and visualize the result of applying delta.
|
||||
Possible Options are ``trainable_ratio``,
|
||||
``visualization``, ``delta_ratio``.
|
||||
|
@ -658,6 +658,15 @@ class DeltaBase(nn.Module, SaveLoadMixin):
|
|||
n_delta = self.num_delta_parameters(module)
|
||||
n_total = self.num_total_parameters(module)
|
||||
logger.info("Delta Parameter Ratio: {:2f}%".format(n_delta/n_total*100))
|
||||
if cuda_memory:
|
||||
cudamem = 0
|
||||
maxcudamem = 0
|
||||
for device_id in range(torch.cuda.device_count()):
|
||||
cudamem += torch.cuda.memory_allocated(f"cuda:{device_id}")/1024**3
|
||||
maxcudamem += torch.cuda.max_memory_allocated(f"cuda:{device_id}")/1024**3
|
||||
logger.info("Static Memory {:.2f} GB, Max Memory {:.2f} GB".format(cudamem, maxcudamem))
|
||||
|
||||
|
||||
|
||||
def num_delta_parameters(self, module: Optional[nn.Module]=None):
|
||||
r"""[NODOC] A small sugar function to get the number of trainable parameter in the backbone model. Often used to
|
||||
|
@ -678,7 +687,7 @@ class DeltaBase(nn.Module, SaveLoadMixin):
|
|||
return pnum_tot
|
||||
|
||||
# Two functions for plug and remove the delta model.
|
||||
def attach(self, module: Optional[nn.Module]=None,):
|
||||
def attach(self, module: Optional[nn.Module]=None, reset_state_dict=True):
|
||||
r"""Reattach the delta modules to the backbone. Note that this method can not be used to create new delta modules.
|
||||
Instead, a :meth:`DeltaBase.detach` should precede this method.
|
||||
|
||||
|
@ -707,10 +716,13 @@ class DeltaBase(nn.Module, SaveLoadMixin):
|
|||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
_delta_info['state'] = "on"
|
||||
_delta_info['state'] = "on"
|
||||
if reset_state_dict:
|
||||
self.set_active_state_dict(module)
|
||||
|
||||
|
||||
def detach(self, module: Optional[nn.Module]=None,):
|
||||
|
||||
def detach(self, module: Optional[nn.Module]=None, reset_state_dict=True):
|
||||
r"""Detach the delta module from the backbone. The delta module is not deleted, but temporarily turned off.
|
||||
Use :meth:`DeltaBase.attach` to reattach the delta model to the backbone.
|
||||
|
||||
|
@ -743,4 +755,10 @@ class DeltaBase(nn.Module, SaveLoadMixin):
|
|||
raise NotImplementedError
|
||||
|
||||
_delta_info['state'] = "off"
|
||||
if reset_state_dict:
|
||||
try:
|
||||
module.state_dict = module.state_dict.__wrapped__
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
|
||||
|
|
|
@ -148,7 +148,7 @@ class LowRankAdapterModel(DeltaBase):
|
|||
"""
|
||||
|
||||
config_class = LowRankAdapterConfig
|
||||
delta_type = "lowrankadapter"
|
||||
delta_type = "low_rank_adapter"
|
||||
default_modified_modules = ['attn', 'ff']
|
||||
def __init__(self,
|
||||
backbone_model: nn.Module,
|
||||
|
|
|
@ -113,18 +113,6 @@ def get_log_levels_dict():
|
|||
return log_levels
|
||||
|
||||
|
||||
def get_logger(name: Optional[str] = None) -> logging.Logger:
|
||||
"""
|
||||
Return a logger with the specified name.
|
||||
This function is not supposed to be directly accessed unless you are writing a custom transformers module.
|
||||
"""
|
||||
|
||||
if name is None:
|
||||
name = _get_library_name()
|
||||
|
||||
_configure_library_root_logger()
|
||||
return logging.getLogger(name)
|
||||
|
||||
|
||||
def get_verbosity() -> int:
|
||||
"""
|
||||
|
@ -275,4 +263,17 @@ def warning_advice(self, *args, **kwargs):
|
|||
|
||||
logging.Logger.warning_advice = warning_advice
|
||||
|
||||
set_verbosity_debug()
|
||||
|
||||
def get_logger(name: Optional[str] = None, verbosity='info') -> logging.Logger:
|
||||
"""
|
||||
Return a logger with the specified name.
|
||||
This function is not supposed to be directly accessed unless you are writing a custom transformers module.
|
||||
"""
|
||||
|
||||
if name is None:
|
||||
name = _get_library_name()
|
||||
|
||||
_configure_library_root_logger()
|
||||
logger = logging.getLogger(name)
|
||||
logger.setLevel(log_levels[verbosity])
|
||||
return logger
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
torch>=1.9.0
|
||||
transformers==4.10.0
|
||||
datasets==1.17.0
|
||||
sentencepiece==0.1.96
|
||||
tqdm==4.62.2
|
||||
torch>=1.8.0
|
||||
transformers>=4.10.0
|
||||
datasets>=1.17.0
|
||||
sentencepiece>=0.1.96
|
||||
tqdm>=4.62.2
|
||||
loralib
|
||||
decorator
|
||||
rich
|
||||
|
|
4
setup.py
4
setup.py
|
@ -17,7 +17,7 @@ print(requires)
|
|||
with open('README.md', 'r') as f:
|
||||
setuptools.setup(
|
||||
name = 'opendelta',
|
||||
version = '0.0.1',
|
||||
version = '0.0.3',
|
||||
description = "An open source framework for delta learning (parameter efficient learning).",
|
||||
long_description=open("README.md", "r", encoding="utf-8").read(),
|
||||
long_description_content_type="text/markdown",
|
||||
|
@ -26,7 +26,7 @@ with open('README.md', 'r') as f:
|
|||
license="Apache",
|
||||
url="https://github.com/thunlp/OpenDelta",
|
||||
keywords = ['PLM', 'Parameter-efficient-Learning', 'AI', 'NLP'],
|
||||
python_requires=">=3.8.0",
|
||||
python_requires=">=3.6.0",
|
||||
install_requires=requires,
|
||||
packages=setuptools.find_packages(),
|
||||
classifiers=[
|
||||
|
|
Loading…
Reference in New Issue