Merge pull request #1 from thunlp/main

Fetch
2022-03-21 21:36:32 +08:00 · 2022-03-21 21:36:32 +08:00 · 3be2d66df4
parent db4132e349 a14c19e8dc
commit 3be2d66df4
11 changed files with 71 additions and 27 deletions
--- a/.gitignore
+++ b/.gitignore
@ -19,6 +19,12 @@ log.txt
 **/DeltaHub/
 *beans/
 **/examples/*/configs/
+
+!examples/*/configs/config_gen.py
+**/jupyter_notebook_examples/
+!examples/jupyter_notebook_examples/*.py
+
+
 !**/examples/*/configs/config_gen.py
 **/outputs_search/**/*.bin
 **/outputs_search/**/*.pt
@ -28,4 +34,5 @@ log.txt
 **/examples/examples_bmtrain/BMTrain_stable
 **/examples/examples_bmtrain/BMPretrain
 **/examples/examples_bmtrain/BigModels/BigModels/results
-**/Delta_Memory/
+**/Delta_Memory/
+
--- a/README.md
+++ b/README.md
@ -32,6 +32,8 @@ OpenDelta is a toolkit for parameter efficient methods (we dub it as *delta tuni
 ![How PLM changes using Delta-tuning](docs/source/imgs/demo.gif)

 ## Updates
+- 2022.03.20 add a [colab example](https://colab.research.google.com/drive/1hM_zu11s6plpK-YQSkz3CrowJyxtHneY?usp=sharing) to illustrate efficient training and space-saving multitask-serving.
+- 2022.03.20 a new pip version released.
 - 2022.02.16 support [regular expression](https://opendelta.readthedocs.io/en/latest/notes/namebasedaddr.html#regexexpr) in named-based addressing. 

 ## Installation
--- a/dist/opendelta-0.0.1.tar.gz
+++ b/dist/opendelta-0.0.1.tar.gz
--- a/dist/opendelta-0.0.3-py3-none-any.whl
+++ b/dist/opendelta-0.0.3-py3-none-any.whl
--- a/dist/opendelta-0.0.3.tar.gz
+++ b/dist/opendelta-0.0.3.tar.gz
--- a/examples/examples_seq2seq/configs/config_gen.py
+++ b/examples/examples_seq2seq/configs/config_gen.py
@ -48,6 +48,7 @@ BaseConfigs['t5-base'] = {
                "save_strategy": "steps"
            }

+
 BaseConfigs['t5-large'] = {
                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs", 
                "max_source_length",
@ -107,6 +108,7 @@ BaseConfigs['t5-3b'] = {
                    # [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
                    [ 32,  32,  32,  32,  32,  16,  32] + [8] * 8,
                    [ 32,  32,  32,  32,  32,  16,  32] + [8] * 8,
+
                    [0] *7 +[0] *8,
                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
@ -114,7 +116,6 @@ BaseConfigs['t5-3b'] = {
                "do_train": True,
                "do_eval": True,
                "do_test": True,
-                
                "model_name_or_path": "/home/hushengding/plm_cache/t5-3b",
                "tokenizer_name": "/home/hushengding/plm_cache/t5-3b",
                "save_total_limit": 1,
@ -255,6 +256,21 @@ AllConfigs['low_rank_adapter_t5-base'].update({
                                "low_rank_rank": 1,
                            })

+AllConfigs['low_rank_adapter_t5-xxl'] = copy.deepcopy(BaseConfigs['t5-xxl'])
+AllConfigs['low_rank_adapter_t5-xxl'].update({
+                                "delta_type": "low_rank_adapter",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "output_dir": "outputs/low_rank_adapter/t5-xxl/",
+                                "non_linearity": "gelu_new",
+                                "low_rank_w_init": "glorot-uniform", 
+                                "low_rank_rank": 1,
+                            })
+

 AllConfigs['soft_prompt_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
 AllConfigs['soft_prompt_t5-base'].update({
--- a/opendelta/basemodel.py
+++ b/opendelta/basemodel.py
@ -632,7 +632,7 @@ class DeltaBase(nn.Module, SaveLoadMixin):
        self.config = config
        
    
-    def log(self, module=None, delta_ratio=True, trainable_ratio=True, visualization=True):
+    def log(self, module=None, delta_ratio=True, trainable_ratio=True, visualization=True, cuda_memory=True):
        r"""Log and visualize the result of applying delta. 
        Possible Options are ``trainable_ratio``,
        ``visualization``, ``delta_ratio``.
@ -658,6 +658,15 @@ class DeltaBase(nn.Module, SaveLoadMixin):
            n_delta = self.num_delta_parameters(module)
            n_total = self.num_total_parameters(module)
            logger.info("Delta Parameter Ratio: {:2f}%".format(n_delta/n_total*100))
+        if cuda_memory:
+            cudamem = 0
+            maxcudamem = 0
+            for device_id in range(torch.cuda.device_count()):
+                cudamem += torch.cuda.memory_allocated(f"cuda:{device_id}")/1024**3
+                maxcudamem += torch.cuda.max_memory_allocated(f"cuda:{device_id}")/1024**3
+            logger.info("Static Memory {:.2f} GB, Max Memory {:.2f} GB".format(cudamem, maxcudamem))
+
+

    def num_delta_parameters(self, module: Optional[nn.Module]=None):
        r"""[NODOC] A small sugar function to get the number of trainable parameter in the backbone model. Often used to 
@ -678,7 +687,7 @@ class DeltaBase(nn.Module, SaveLoadMixin):
        return pnum_tot
        
    # Two functions for plug and remove the delta model.
-    def attach(self, module: Optional[nn.Module]=None,):
+    def attach(self, module: Optional[nn.Module]=None, reset_state_dict=True):
        r"""Reattach the delta modules to the backbone. Note that this method can not be used to create new delta modules.
        Instead, a :meth:`DeltaBase.detach` should precede this method. 

@ -707,10 +716,13 @@ class DeltaBase(nn.Module, SaveLoadMixin):
                    else:
                        raise NotImplementedError

-                    _delta_info['state'] = "on"     
+                    _delta_info['state'] = "on"   
+        if reset_state_dict:
+            self.set_active_state_dict(module)


-    def detach(self, module: Optional[nn.Module]=None,):
+
+    def detach(self, module: Optional[nn.Module]=None, reset_state_dict=True):
        r"""Detach the delta module from the backbone. The delta module is not deleted, but temporarily turned off.
        Use :meth:`DeltaBase.attach` to reattach the delta model to the backbone.

@ -743,4 +755,10 @@ class DeltaBase(nn.Module, SaveLoadMixin):
                        raise NotImplementedError
                    
                    _delta_info['state'] = "off"
+        if reset_state_dict:
+            try:
+                module.state_dict = module.state_dict.__wrapped__
+            except AttributeError:
+                pass
+        

--- a/opendelta/delta_models/low_rank_adapter.py
+++ b/opendelta/delta_models/low_rank_adapter.py
@ -148,7 +148,7 @@ class LowRankAdapterModel(DeltaBase):
    """

    config_class = LowRankAdapterConfig
-    delta_type = "lowrankadapter"
+    delta_type = "low_rank_adapter"
    default_modified_modules = ['attn', 'ff']
    def __init__(self,
                 backbone_model: nn.Module, 
--- a/opendelta/utils/logging.py
+++ b/opendelta/utils/logging.py
@ -113,18 +113,6 @@ def get_log_levels_dict():
    return log_levels


-def get_logger(name: Optional[str] = None) -> logging.Logger:
-    """
-    Return a logger with the specified name.
-    This function is not supposed to be directly accessed unless you are writing a custom transformers module.
-    """
-
-    if name is None:
-        name = _get_library_name()
-
-    _configure_library_root_logger()
-    return logging.getLogger(name)
-

 def get_verbosity() -> int:
    """
@ -275,4 +263,17 @@ def warning_advice(self, *args, **kwargs):

 logging.Logger.warning_advice = warning_advice

-set_verbosity_debug()
+
+def get_logger(name: Optional[str] = None, verbosity='info') -> logging.Logger:
+    """
+    Return a logger with the specified name.
+    This function is not supposed to be directly accessed unless you are writing a custom transformers module.
+    """
+
+    if name is None:
+        name = _get_library_name()
+
+    _configure_library_root_logger()
+    logger = logging.getLogger(name)
+    logger.setLevel(log_levels[verbosity])
+    return logger
--- a/requirements.txt
+++ b/requirements.txt
@ -1,8 +1,8 @@
-torch>=1.9.0
-transformers==4.10.0
-datasets==1.17.0
-sentencepiece==0.1.96
-tqdm==4.62.2
+torch>=1.8.0
+transformers>=4.10.0
+datasets>=1.17.0
+sentencepiece>=0.1.96
+tqdm>=4.62.2
 loralib
 decorator
 rich
--- a/setup.py
+++ b/setup.py
@ -17,7 +17,7 @@ print(requires)
 with open('README.md', 'r') as f:
    setuptools.setup(
        name = 'opendelta',
-        version = '0.0.1',
+        version = '0.0.3',
        description = "An open source framework for delta learning (parameter efficient learning).",
        long_description=open("README.md", "r", encoding="utf-8").read(),
        long_description_content_type="text/markdown",
@ -26,7 +26,7 @@ with open('README.md', 'r') as f:
        license="Apache",
        url="https://github.com/thunlp/OpenDelta",
        keywords = ['PLM', 'Parameter-efficient-Learning', 'AI', 'NLP'],
-        python_requires=">=3.8.0",
+        python_requires=">=3.6.0",
        install_requires=requires,
        packages=setuptools.find_packages(),
        classifiers=[