merge parallel

2022-03-13 22:04:38 +08:00 · 2022-03-13 22:04:38 +08:00 · 266a00e390
parent 7eea0cb94e
commit 266a00e390
3 changed files with 10 additions and 3 deletions
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@ -52,6 +52,7 @@ extensions = [
    'sphinx.ext.autosummary',
    'sphinx.ext.doctest',
    'sphinx.ext.intersphinx',
+    # 'sphinx.ext.mathbase',
    'sphinx.ext.mathjax',
    'sphinx.ext.napoleon',
    'sphinx.ext.viewcode',
--- a/examples/examples_text-classification/configs/lora_roberta-base/lora_mrpc.json
+++ b/examples/examples_text-classification/configs/lora_roberta-base/lora_mrpc.json
@ -38,7 +38,8 @@
    "tokenizer_name": "roberta-base",
    "unfrozen_modules": [
        "classifier",
-        "deltas"
+        "deltas",
+        "layer_norm"
    ],
    "warmup_ratio": 0.06,
    "weight_decay": 0.1,
--- a/opendelta/delta_models/lora.py
+++ b/opendelta/delta_models/lora.py
@ -66,8 +66,13 @@ class LoraConfig(BaseDeltaConfig):

 class LoraModel(DeltaBase):
    r""" The implementation of `LoRA: Low-Rank Adaptation of Large Language Models <https://arxiv.org/abs/2106.09685>`_ .
-    Thanks for their `loralib <https://github.com/microsoft/LoRA/tree/main/loralib>`_, we use loralib.linear 
-    to replace the linear layer of the backbone model. 
+    Thanks for their `loralib <https://github.com/microsoft/LoRA/tree/main/loralib>`_.
+    
+    .. note::
+        In our implementation, we did not use loralib.linear to replace the linear layer of the backbone model.
+        Instead, we insert a parallel module into the backbone.
+        In other words, we treat :math:`(W + A^TB) X` as :math:`WX+ A^TBX`, and insert the :math:`A^TBX` as a parallel insertion module. 
+        If you want to use the original implementation, please refer to `lora_old.py`

    class attributes:
        - default_modified_modules = ['attn.q', 'attn.v'] According to the paper, they modify q and v matrix in the