diff --git a/.gitignore b/.gitignore index 783241d..e789812 100644 --- a/.gitignore +++ b/.gitignore @@ -57,3 +57,4 @@ t.sh unittest/outputs/ unittest/tmp/ +**/tmp/ diff --git a/README.md b/README.md index 60b0789..31c85ae 100644 --- a/README.md +++ b/README.md @@ -83,16 +83,81 @@ pip install -r requirements.txt && python setup.py develop ``` ## Must Try +The follow codes and comments walk you through the key functionality of OpenDelta. ```python -from transformers import AutoModelForSeq2SeqLM +# use tranformers as usual. +from transformers import AutoModelForSeq2SeqLM, AutoTokenizer t5 = AutoModelForSeq2SeqLM.from_pretrained("t5-large") -from opendelta import AutoDeltaModel -delta = AutoDeltaModel.from_finetuned("thunlp/FactQA_T5-large_Adapter", backbone_model=t5) +t5_tokenizer = AutoTokenizer.from_pretrained("t5-large") +inputs_ids = t5_tokenizer.encode("Is Harry Poter wrtten by JKrowling", return_tensors="pt") +t5_tokenizer.decode(t5.generate(inputs_ids)[0]) +# >>> '? Is it Harry Potter?' + + +# use existing delta models +from opendelta import AutoDeltaModel, AutoDeltaConfig +# use existing delta models from DeltaCenter +delta = AutoDeltaModel.from_finetuned("thunlp/Spelling_Correction_T5_LRAdapter_demo", backbone_model=t5) +# freeze the backbone model except the delta models. +delta.freeze_module() +# see the change delta.log() + + +t5_tokenizer.decode(t5.generate(inputs_ids)[0]) +# >>> Is Harry Potter written by JK Rowling? + + +# Now save the delta models, not the whole backbone model to tmp +delta.save_finetuned(".tmp") +# >>> The state dict size is 1.443 MB +# >>> We encourage users to push their final and public models to delta center to share them with the community! + + +# reload the model from local url and add it to pre-trained T5. +t5 = AutoModelForSeq2SeqLM.from_pretrained("t5-large") +delta1 = AutoDeltaModel.from_finetuned(".tmp", backbone_model=t5) +import shutil; shutil.rmtree(".tmp") # don't forget to remove the tmp files. +t5_tokenizer.decode(t5.generate(inputs_ids)[0]) +# >>> Is Harry Potter written by JK Rowling? + +# detach the delta models, the model returns to the unmodified status. +delta1.detach() +t5_tokenizer.decode(t5.generate(inputs_ids)[0]) +# >>> '? Is it Harry Potter?' + +# use default configuration for cunstomized wrapped models which have PLMs inside. +import torch.nn as nn +class WrappedModel(nn.Module): + def __init__(self, inner_model): + super().__init__() + self.inner = inner_model + def forward(self, *args, **kwargs): + return self.inner(*args, **kwargs) + +wrapped_model = WrappedModel(WrappedModel(t5)) + +# say we use LoRA +delta_config = AutoDeltaConfig.from_dict({"delta_type":"lora"}) +delta2 = AutoDeltaModel.from_config(delta_config, backbone_model=wrapped_model) +delta2.log() +# >>> root +# -- inner +# -- inner +# ... +# ... lora_A:[8,1024], lora_B:[1024,8] +delta2.detach() + +# use a not default configuration +# say we add lora to the last four layer of the decoder of t5, with lora rank=5 +delta_config3 = AutoDeltaConfig.from_dict({"delta_type":"lora", "modified_modules":["[r]decoder.*((20)|(21)|(22)|(23)).*DenseReluDense\.wi"], "lora_r":5}) +delta3 = AutoDeltaModel.from_config(delta_config3, backbone_model=wrapped_model) +delta3.log() + ``` -## Verified Supported Models +## Verified Default Configurations - **You can try to use OpenDelta on *any* backbone models based on PyTorch.** - However, with small chances thatThe interface of the submodules of the backbone model is not supported. Therefore we verified some commonly