diff --git a/docs/source/conf.py b/docs/source/conf.py index 8408041..e2f01a3 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -52,6 +52,7 @@ extensions = [ 'sphinx.ext.autosummary', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', + # 'sphinx.ext.mathbase', 'sphinx.ext.mathjax', 'sphinx.ext.napoleon', 'sphinx.ext.viewcode', diff --git a/examples/examples_text-classification/configs/lora_roberta-base/lora_mrpc.json b/examples/examples_text-classification/configs/lora_roberta-base/lora_mrpc.json index 46afef5..10eeb38 100644 --- a/examples/examples_text-classification/configs/lora_roberta-base/lora_mrpc.json +++ b/examples/examples_text-classification/configs/lora_roberta-base/lora_mrpc.json @@ -38,7 +38,8 @@ "tokenizer_name": "roberta-base", "unfrozen_modules": [ "classifier", - "deltas" + "deltas", + "layer_norm" ], "warmup_ratio": 0.06, "weight_decay": 0.1, diff --git a/opendelta/delta_models/lora.py b/opendelta/delta_models/lora.py index 492fea6..02ebe85 100644 --- a/opendelta/delta_models/lora.py +++ b/opendelta/delta_models/lora.py @@ -66,8 +66,13 @@ class LoraConfig(BaseDeltaConfig): class LoraModel(DeltaBase): r""" The implementation of `LoRA: Low-Rank Adaptation of Large Language Models `_ . - Thanks for their `loralib `_, we use loralib.linear - to replace the linear layer of the backbone model. + Thanks for their `loralib `_. + + .. note:: + In our implementation, we did not use loralib.linear to replace the linear layer of the backbone model. + Instead, we insert a parallel module into the backbone. + In other words, we treat :math:`(W + A^TB) X` as :math:`WX+ A^TBX`, and insert the :math:`A^TBX` as a parallel insertion module. + If you want to use the original implementation, please refer to `lora_old.py` class attributes: - default_modified_modules = ['attn.q', 'attn.v'] According to the paper, they modify q and v matrix in the