Merge branch 'delta_center_dev' into check_pr_33

2022-10-10 13:06:38 +08:00 · 2022-10-10 13:06:38 +08:00 · f6f3b01916
parent 2351259ecd f8cb2d3138
commit f6f3b01916
111 changed files with 2529 additions and 776 deletions
--- a/.gitignore
+++ b/.gitignore
@ -35,4 +35,21 @@ log.txt
 **/examples/examples_bmtrain/BMPretrain
 **/examples/examples_bmtrain/BigModels/BigModels/results
 **/Delta_Memory/
+**/output/
+**/thunlp/
+**/saved_ckpts/

+
+DeltaCenter-Python-Client/
+backbone_structure
+delta_checkpoints
+gitop.sh
+load_dataset_and_model.ipynb
+load_model.py
+scripts
+t.py
+t.sh
+!examples/examples_prompt/configs/*/*.json
+!examples/examples_prompt/configs/**
+**/delta_checkpoints/
+**/outputs/
--- a/README.md
+++ b/README.md
@ -72,6 +72,11 @@ python setup.py install
 python setup.py develop
 ```

+If you encounter network error using setup.py, please firstly install the dependencies via
+```shell
+pip install -r requirements.txt && python setup.py develop
+```
+
 ## Must Try

 ```python
--- a/dist/opendelta-0.2.0-py3-none-any.whl
+++ b/dist/opendelta-0.2.0-py3-none-any.whl
--- a/dist/opendelta-0.2.0.tar.gz
+++ b/dist/opendelta-0.2.0.tar.gz
--- a/dist/opendelta-0.2.1-py3-none-any.whl
+++ b/dist/opendelta-0.2.1-py3-none-any.whl
--- a/dist/opendelta-0.2.1.tar.gz
+++ b/dist/opendelta-0.2.1.tar.gz
--- a/dist/opendelta-0.2.2-py3-none-any.whl
+++ b/dist/opendelta-0.2.2-py3-none-any.whl
--- a/dist/opendelta-0.2.2.tar.gz
+++ b/dist/opendelta-0.2.2.tar.gz
--- a/dist/opendelta-0.2.3-py3-none-any.whl
+++ b/dist/opendelta-0.2.3-py3-none-any.whl
--- a/dist/opendelta-0.2.3.tar.gz
+++ b/dist/opendelta-0.2.3.tar.gz
--- a/dist/opendelta-0.2.4-py3-none-any.whl
+++ b/dist/opendelta-0.2.4-py3-none-any.whl
--- a/dist/opendelta-0.2.4.tar.gz
+++ b/dist/opendelta-0.2.4.tar.gz
--- a/examples/examples_prompt/README.md
+++ b/examples/examples_prompt/README.md
@ -1,24 +1,59 @@
-# !!!!This example collection is still under develop, please wait for some time to use it.
+# Examples of using opendelta together with 🤗 transformers.

-## install the repo
+In this repo, we construct a very general pipeline to train and test a PLM using
+🤗 transformers.
+
+The pipeline was constructed together with [openpromptu](https://pypi.org/project/openpromptu/), which is a light and
+model-agnostic version of [openprompt](https://github.com/thunlp/OpenPrompt).
+
+## Pool of PLMs
+We are going to adapt most of the models in 🤗 transformers
+in the repos. The different pipeline, processing, or configurations are specified
+in `./backbones/`. You can add your own model in this file to support customized models.
+
+
+### A example script to run the repo in offline mode
 ```bash
-cd ../
-python setup_seq2seq.py develop
+conda activate [YOURENV]
+PATHBASE=[YOURPATH]
+
+JOBNAME="adapter_t5-base"
+DATASET="superglue-cb"
+
+cd $PATHBASE/OpenDelta/examples/examples_prompt/
+python configs/gen_t5.py --job $JOBNAME
+
+export TRANSFORMERS_OFFLINE=1
+export HF_DATASETS_OFFLINE=1
+python src/run.py configs/$JOBNAME/$DATASET.json \
+--model_name_or_path [YOURPATH_TO_T5_BASE] \
+--tokenizer_name [YOURPATH_TO_T5_BASE] \
+--datasets_saved_path [YOURPATH_TO_CB_DATASETS] \
+--finetuned_delta_path ${PATHBASE}/delta_checkpoints/ \
+--num_train_epochs 20 \
+--bottleneck_dim 24 \
+--delay_push True
 ```
-This will add `examples_seq2seq` to the environment path of the python lib.

-## Generating the json configuration file
+## A example of quick testing the repo.

-```shell
-python configs/gen_$BACKBONETYPE.py --job $YOURJOB
-#e.g. python configs/gen_beit.py --job lora_beit-base-patch16-224
-```
-The available job configuration (e.g., `--job lora_beit-base-patch16-224`) can be seen from the scripts. You can also
-create your only configuration.
+```bash
+conda activate [YOURENV]
+PATHBASE=[YOURPATH]

+JOBNAME="adapter_t5-base"
+DATASET="superglue-cb"

-## Run the code
+cd $PATHBASE/OpenDelta/examples/examples_prompt/

-```
-CUDA_VISIBLE_DEVICES=1 python src/run.py configs/lora_beit-base-patch16-224/beans.json
-```
+export TRANSFORMERS_OFFLINE=1
+export HF_DATASETS_OFFLINE=1
+export DELTACENTER_OFFLINE=0
+python src/test.py configs/$JOBNAME/$DATASET.json \
+--model_name_or_path [YOURPATH_TO_T5_BASE] \
+--tokenizer_name [YOURPATH_TO_T5_BASE] \
+--datasets_saved_path [YOURPATH_TO_CB_DATASETS] \
+--finetuned_delta_path thunlp/t5-base_adapter_superglue-cb_20220701171436c80 \
+--delta_cache_dir "./delta_checkpoints/" \
+--force_download True
+```
--- a/examples/examples_prompt/backbones/bart.py
+++ b/examples/examples_prompt/backbones/bart.py
@ -26,14 +26,14 @@ def preprocess_function(raw_example, **kwargs):
    example = InputExample(**raw_example)


-    try:
-        example = verbalizer.wrap_one_example(example)
-        example, other = template.wrap_one_example(example)
-        input_sentence = tokenizer_wrapper.merge_wrapped_example(example)
-        model_inputs = tokenizer(input_sentence, max_length=256,
-                            padding="max_length", truncation=True)
-    except:
-        from IPython import embed; embed(header="Therer")
+
+    example = verbalizer.wrap_one_example(example)
+    example, other = template.wrap_one_example(example)
+    input_sentence = tokenizer_wrapper.merge_wrapped_example(example)
+    model_inputs = tokenizer(input_sentence, max_length=256,
+                        padding="max_length", truncation=True)
+
+

    with tokenizer.as_target_tokenizer():
        label = tokenizer(other['tgt_text']).input_ids
@ -43,7 +43,8 @@ def preprocess_function(raw_example, **kwargs):

 def get_backbone(model_args, **kwargs):
    config = AutoConfig.from_pretrained(
-        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        # model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        model_args.model_name_or_path,
        cache_dir=model_args.cache_dir,
        revision=model_args.model_revision,
        use_auth_token=True if model_args.use_auth_token else None,
--- a/examples/examples_prompt/backbones/beit.py
+++ b/examples/examples_prompt/backbones/beit.py
@ -8,7 +8,6 @@ from transformers import (
    AutoFeatureExtractor,
    AutoModelForImageClassification,
 )
-from transformers import ViTFeatureExtractor

 from transformers import Trainer as HfTrainer
 import torch.nn as nn
@ -26,9 +25,10 @@ def get_prompts(task, tokenizer, data_args, template_id="0", verbalizer_id="0"):
 def preprocess_function(raw_example, **kwargs):
    # from IPython import embed; embed(header="Therefa")
    tokenizer = kwargs['tokenizer']
-    model_inputs = tokenizer(raw_example['image'], return_tensors='pt')
+    # print(np.array(raw_example['img']).shape)
+    model_inputs = tokenizer(np.array(raw_example['image']), return_tensors='pt')
    model_inputs['pixel_values'] = model_inputs['pixel_values'].squeeze()
-    model_inputs['labels'] = raw_example['labels']
+    model_inputs['labels'] = raw_example['label']
    return model_inputs

 def compute_metrics(eval_preds, dataset_name, eval_metric):
@ -55,7 +55,7 @@ def mask_token_func(tokenizer, ith_mask=0):

 def get_remove_columns(dataset_features):
    # dataset_features.pop("label")
-    print("remove_columns: {}".format(dataset_features))
+    # print("remove_columns: {}".format(dataset_features))
    return dataset_features

 class DataCollator(HfDataCollatorMixin):
--- a/examples/examples_prompt/backbones/bigbird_.py
+++ b/examples/examples_prompt/backbones/bigbird_.py
@ -0,0 +1,169 @@
+from openpromptu.data_utils import InputExample
+import torch
+from transformers.data.data_collator import torch_default_data_collator
+from transformers.data.data_collator import DataCollatorMixin as HfDataCollatorMixin
+from transformers.data.data_collator import DataCollatorForSeq2Seq as DataCollator
+import numpy as np
+from transformers import (
+    AutoConfig,
+    AutoModelForCausalLM,
+    AutoTokenizer,
+)
+
+from transformers import Seq2SeqTrainer as HfSeq2SeqTrainer
+import copy
+from torch.nn import CrossEntropyLoss
+
+def preprocess_function(raw_example, **kwargs):
+    tokenizer = kwargs['tokenizer']
+    data_args = kwargs['data_args']
+    template = kwargs['template']
+    verbalizer = kwargs['verbalizer']
+    tokenizer_wrapper = kwargs['tokenizer_wrapper']
+
+    example = InputExample(**raw_example)
+    # example = verbalizer.wrap_one_example(example)
+    example, other = template.wrap_one_example(example)
+    input_sentence = tokenizer_wrapper.merge_wrapped_example(example)
+    model_inputs = tokenizer(input_sentence, max_length=data_args.max_source_length,
+                        padding="max_length", truncation=True)
+    return model_inputs
+    
+
+
+def compute_metrics(eval_preds, dataset_name, eval_metric):
+    pass
+
+def mask_token_func(tokenizer, ith_mask=0):
+    return tokenizer.pad_token
+
+def get_remove_columns(dataset_features):
+    # dataset_features.remove("label")
+    return dataset_features
+
+def get_prompts(task, tokenizer, data_args, template_id="0", verbalizer_id="0"):
+    from openpromptu.prompts import GenerationVerbalizer
+    from openpromptu.prompts import ManualTemplate
+    from openpromptu import TokenizerWrapper
+    template = ManualTemplate(text = task.templates_text[template_id])
+    verbalizer = GenerationVerbalizer(tokenizer=tokenizer, classes = None, label_words=None)
+    tokenizer_wrapper = TokenizerWrapper(max_seq_length=data_args.max_source_length, tokenizer=tokenizer, truncate_method="balanced", mask_token_func=mask_token_func)
+    return template, verbalizer, tokenizer_wrapper
+
+
+def get_backbone(model_args, **kwargs):
+    config = AutoConfig.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # config.dropout_rate = 0.0
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        use_fast=model_args.use_fast_tokenizer,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+
+
+    model = AutoModelForCausalLM.from_pretrained(
+        model_args.model_name_or_path,
+        from_tf=bool(".ckpt" in model_args.model_name_or_path),
+        config=config,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+        )
+    return config, tokenizer, model
+
+class Trainer(HfSeq2SeqTrainer):
+    def __init__(self, verbalizer=None, eval_task=None, **kwargs):
+        super().__init__(**kwargs)
+        self.eval_task = eval_task
+        self.compute_metrics = self._compute_metrics
+
+    def compute_loss(self, model, inputs, return_outputs=False):
+
+        labels=copy.deepcopy(inputs['input_ids'])
+        # labels[labels==self.tokenizer.pad_token_id]=-100
+        outputs = model(**inputs)
+        logits = outputs.logits
+        shift_logits = logits[..., :-1, :].contiguous()
+        shift_labels = labels[..., 1:].contiguous()
+        loss_fct = CrossEntropyLoss(ignore_index=self.tokenizer.pad_token_id)
+        loss = loss_fct(shift_logits.view(-1, shift_logits.shape[-1]), shift_labels.long().view(-1))
+
+        return (loss, outputs) if return_outputs else loss
+
+    def prediction_step(
+        self,
+        model, #nn.Module,
+        inputs, #Dict[str, Union[torch.Tensor, Any]],
+        prediction_loss_only, #: bool,
+        ignore_keys, #: Optional[List[str]] = None,
+    ): #-> Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]:
+        """
+        Perform an evaluation step on :obj:`model` using obj:`inputs`.
+
+        Subclass and override to inject custom behavior.
+
+        Args:
+            model (:obj:`nn.Module`):
+                The model to evaluate.
+            inputs (:obj:`Dict[str, Union[torch.Tensor, Any]]`):
+                The inputs and targets of the model.
+
+                The dictionary will be unpacked before being fed to the model. Most models expect the targets under the
+                argument :obj:`labels`. Check your model's documentation for all accepted arguments.
+            prediction_loss_only (:obj:`bool`):
+                Whether or not to return the loss only.
+
+        Return:
+            Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]: A tuple with the loss, logits and
+            labels (each being optional).
+        """
+        if not self.args.predict_with_generate or prediction_loss_only:
+            return super().prediction_step(
+                model, inputs, prediction_loss_only=prediction_loss_only, ignore_keys=ignore_keys
+            )
+
+        inputs = self._prepare_inputs(inputs)
+        with torch.no_grad():
+            labels=copy.deepcopy(inputs['input_ids'])
+            # labels[labels==self.tokenizer.pad_token_id]=-100
+            outputs = model(**inputs)
+            logits = outputs.logits
+            shift_logits = logits[..., :-1, :].contiguous()
+            shift_labels = labels[..., 1:].contiguous().long()
+            loss_fct = CrossEntropyLoss(ignore_index=self.tokenizer.pad_token_id)
+            loss = loss_fct(shift_logits.view(-1, shift_logits.shape[-1]), shift_labels.view(-1)).detach().cpu()
+            loss = torch.where(torch.isnan(loss), torch.full_like(loss, 0), loss)
+            
+        if prediction_loss_only:
+            return (loss, None, None)
+        else:
+            # non pad label
+            shift_labels = shift_labels.view(-1).detach().cpu()
+            nonpad_idx = shift_labels!=self.tokenizer.pad_token_id
+            shift_labels = shift_labels[nonpad_idx]
+            # the probability at the corresponding position
+            shift_logits = shift_logits.view(-1, shift_logits.shape[-1])[nonpad_idx].detach().cpu()
+            target_position = torch.nn.functional.one_hot(shift_labels,shift_logits.shape[-1]).bool().to(shift_labels.device)
+            shift_logits = shift_logits.softmax(dim=-1)[target_position]
+
+
+            return (loss, shift_logits, shift_labels)
+
+    def _compute_metrics(self, eval_preds):
+
+        preds, labels = eval_preds
+
+        result = {}
+        for metric in self.eval_task.metric:
+            result.update(metric(preds, labels,ignore_index=self.tokenizer.pad_token_id))
+
+        average_metric = sum(result.values())/len(result)
+        result.update({"average_metrics":average_metric})
+        return result
--- a/examples/examples_prompt/backbones/blenderbot.py
+++ b/examples/examples_prompt/backbones/blenderbot.py
@ -26,14 +26,13 @@ def preprocess_function(raw_example, **kwargs):
    example = InputExample(**raw_example)


-    try:
-        example = verbalizer.wrap_one_example(example)
-        example, other = template.wrap_one_example(example)
-        input_sentence = tokenizer_wrapper.merge_wrapped_example(example)
-        model_inputs = tokenizer(input_sentence, max_length=data_args.max_source_length,
-                            padding="max_length", truncation=True)
-    except:
-        from IPython import embed; embed(header="Therer")
+   
+    example = verbalizer.wrap_one_example(example)
+    example, other = template.wrap_one_example(example)
+    input_sentence = tokenizer_wrapper.merge_wrapped_example(example)
+    model_inputs = tokenizer(input_sentence, max_length=data_args.max_source_length,
+                        padding="max_length", truncation=True)
+

    with tokenizer.as_target_tokenizer():
        label = tokenizer(other['tgt_text']).input_ids
@ -165,7 +164,7 @@ class Trainer(HfSeq2SeqTrainer):
        return (loss, generated_tokens, labels)

    def _compute_metrics(self, eval_preds):
-        from IPython import embed; embed(header="In compute metrics")
+        # from IPython import embed; embed(header="In compute metrics")
        preds, labels = eval_preds
        decoded_preds = self.tokenizer.batch_decode(preds, skip_special_tokens=True)
        decoded_labels = self.tokenizer.batch_decode(labels, skip_special_tokens=True)
--- a/examples/examples_prompt/backbones/opt.py
+++ b/examples/examples_prompt/backbones/opt.py
@ -0,0 +1,171 @@
+from openpromptu.data_utils import InputExample
+import torch
+from transformers.data.data_collator import torch_default_data_collator
+from transformers.data.data_collator import DataCollatorMixin as HfDataCollatorMixin
+from transformers.data.data_collator import DataCollatorForSeq2Seq as DataCollator
+import numpy as np
+from transformers import (
+    AutoConfig,
+    AutoModelForCausalLM,
+    AutoTokenizer,
+)
+
+from transformers import Seq2SeqTrainer as HfSeq2SeqTrainer
+import copy
+from torch.nn import CrossEntropyLoss
+
+def preprocess_function(raw_example, **kwargs):
+    tokenizer = kwargs['tokenizer']
+    data_args = kwargs['data_args']
+    template = kwargs['template']
+    verbalizer = kwargs['verbalizer']
+    tokenizer_wrapper = kwargs['tokenizer_wrapper']
+
+    example = InputExample(**raw_example)
+    # example = verbalizer.wrap_one_example(example)
+    example, other = template.wrap_one_example(example)
+    input_sentence = tokenizer_wrapper.merge_wrapped_example(example)
+    model_inputs = tokenizer(input_sentence, max_length=data_args.max_source_length,
+                        padding="max_length", truncation=True)
+    return model_inputs
+    
+
+
+def compute_metrics(eval_preds, dataset_name, eval_metric):
+    pass
+
+def mask_token_func(tokenizer, ith_mask=0):
+    return tokenizer.pad_token
+
+def get_remove_columns(dataset_features):
+    # dataset_features.remove("label")
+    return dataset_features
+
+def get_prompts(task, tokenizer, data_args, template_id="0", verbalizer_id="0"):
+    from openpromptu.prompts import GenerationVerbalizer
+    from openpromptu.prompts import ManualTemplate
+    from openpromptu import TokenizerWrapper
+    template = ManualTemplate(text = task.templates_text[template_id])
+    verbalizer = GenerationVerbalizer(tokenizer=tokenizer, classes = None, label_words=None)
+    tokenizer_wrapper = TokenizerWrapper(max_seq_length=data_args.max_source_length, tokenizer=tokenizer, truncate_method="tail", mask_token_func=mask_token_func)
+    return template, verbalizer, tokenizer_wrapper
+
+
+def get_backbone(model_args, **kwargs):
+    config = AutoConfig.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # config.dropout_rate = 0.0
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        use_fast=model_args.use_fast_tokenizer,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    if not hasattr(tokenizer,"pad_token") or (hasattr(tokenizer,"pad_token") and tokenizer.pad_token==None):
+        tokenizer.pad_token = tokenizer.eos_token
+
+    model = AutoModelForCausalLM.from_pretrained(
+        model_args.model_name_or_path,
+        from_tf=bool(".ckpt" in model_args.model_name_or_path),
+        config=config,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+        )
+
+    return config, tokenizer, model
+
+class Trainer(HfSeq2SeqTrainer):
+    def __init__(self, verbalizer=None, eval_task=None, **kwargs):
+        super().__init__(**kwargs)
+        self.eval_task = eval_task
+        self.compute_metrics = self._compute_metrics
+
+    def compute_loss(self, model, inputs, return_outputs=False):
+
+        labels=copy.deepcopy(inputs['input_ids'])
+        # labels[labels==self.tokenizer.pad_token_id]=-100
+        outputs = model(**inputs)
+        logits = outputs.logits
+        shift_logits = logits[..., :-1, :].contiguous()
+        shift_labels = labels[..., 1:].contiguous()
+        loss_fct = CrossEntropyLoss(ignore_index=self.tokenizer.pad_token_id)
+        loss = loss_fct(shift_logits.view(-1, shift_logits.shape[-1]), shift_labels.long().view(-1))
+
+        return (loss, outputs) if return_outputs else loss
+
+    def prediction_step(
+        self,
+        model, #nn.Module,
+        inputs, #Dict[str, Union[torch.Tensor, Any]],
+        prediction_loss_only, #: bool,
+        ignore_keys, #: Optional[List[str]] = None,
+    ): #-> Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]:
+        """
+        Perform an evaluation step on :obj:`model` using obj:`inputs`.
+
+        Subclass and override to inject custom behavior.
+
+        Args:
+            model (:obj:`nn.Module`):
+                The model to evaluate.
+            inputs (:obj:`Dict[str, Union[torch.Tensor, Any]]`):
+                The inputs and targets of the model.
+
+                The dictionary will be unpacked before being fed to the model. Most models expect the targets under the
+                argument :obj:`labels`. Check your model's documentation for all accepted arguments.
+            prediction_loss_only (:obj:`bool`):
+                Whether or not to return the loss only.
+
+        Return:
+            Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]: A tuple with the loss, logits and
+            labels (each being optional).
+        """
+        if not self.args.predict_with_generate or prediction_loss_only:
+            return super().prediction_step(
+                model, inputs, prediction_loss_only=prediction_loss_only, ignore_keys=ignore_keys
+            )
+
+        inputs = self._prepare_inputs(inputs)
+        with torch.no_grad():
+            labels=copy.deepcopy(inputs['input_ids'])
+            # labels[labels==self.tokenizer.pad_token_id]=-100
+            outputs = model(**inputs)
+            logits = outputs.logits
+            shift_logits = logits[..., :-1, :].contiguous()
+            shift_labels = labels[..., 1:].contiguous().long()
+            loss_fct = CrossEntropyLoss(ignore_index=self.tokenizer.pad_token_id)
+            loss = loss_fct(shift_logits.view(-1, shift_logits.shape[-1]), shift_labels.view(-1)).detach().cpu()
+            loss = torch.where(torch.isnan(loss), torch.full_like(loss, 0), loss)
+            
+        if prediction_loss_only:
+            return (loss, None, None)
+        else:
+            # non pad label
+            shift_labels = shift_labels.view(-1).detach().cpu()
+            nonpad_idx = shift_labels!=self.tokenizer.pad_token_id
+            shift_labels = shift_labels[nonpad_idx]
+            # the probability at the corresponding position
+            shift_logits = shift_logits.view(-1, shift_logits.shape[-1])[nonpad_idx].detach().cpu()
+            target_position = torch.nn.functional.one_hot(shift_labels,shift_logits.shape[-1]).bool().to(shift_labels.device)
+            shift_logits = shift_logits.softmax(dim=-1)[target_position]
+
+
+            return (loss, shift_logits, shift_labels)
+
+    def _compute_metrics(self, eval_preds):
+
+        preds, labels = eval_preds
+
+        result = {}
+        for metric in self.eval_task.metric:
+            result.update(metric(preds, labels,ignore_index=self.tokenizer.pad_token_id))
+
+        average_metric = sum(result.values())/len(result)
+        result.update({"average_metrics":average_metric})
+        return result
--- a/examples/examples_prompt/backbones/t5.py
+++ b/examples/examples_prompt/backbones/t5.py
@ -26,14 +26,13 @@ def preprocess_function(raw_example, **kwargs):
    example = InputExample(**raw_example)


-    try:
-        example = verbalizer.wrap_one_example(example)
-        example, other = template.wrap_one_example(example)
-        input_sentence = tokenizer_wrapper.merge_wrapped_example(example)
-        model_inputs = tokenizer(input_sentence, max_length=256,
-                            padding="max_length", truncation=True)
-    except:
-        from IPython import embed; embed(header="Therer")
+ 
+    example = verbalizer.wrap_one_example(example)
+    example, other = template.wrap_one_example(example)
+    input_sentence = tokenizer_wrapper.merge_wrapped_example(example)
+    model_inputs = tokenizer(input_sentence, max_length=256,
+                        padding="max_length", truncation=True)
+

    with tokenizer.as_target_tokenizer():
        label = tokenizer(other['tgt_text']).input_ids
--- a/examples/examples_prompt/backbones/vit.py
+++ b/examples/examples_prompt/backbones/vit.py
--- a/examples/examples_prompt/collect_result.jsonl
+++ b/examples/examples_prompt/collect_result.jsonl
@ -1,59 +0,0 @@
-# the final results will be populated here.{
-    "evaluate": {
-        "epoch": 20.0,
-        "eval_accuracy": 89.2156862745098,
-        "eval_average_metrics": 90.76168929110105,
-        "eval_f1": 92.3076923076923,
-        "eval_loss": 0.16493959724903107,
-        "eval_runtime": 1.6391,
-        "eval_samples_per_second": 124.455
-    },
-    "repo_name": "DeltaHub/bitfit_t5-base_mrpc",
-    "test": {
-        "epoch": 20.0,
-        "test_accuracy": 88.23529411764706,
-        "test_average_metrics": 89.97971602434077,
-        "test_f1": 91.72413793103448,
-        "test_loss": 0.14968213438987732,
-        "test_runtime": 1.6344,
-        "test_samples_per_second": 124.82
-    }
-}
-{
-    "evaluate": {
-        "epoch": 20.0,
-        "eval_average_metrics": 52.10265668831534,
-        "eval_loss": 0.3603779077529907,
-        "eval_matthews_correlation": 52.10265668831534,
-        "eval_runtime": 1.0808,
-        "eval_samples_per_second": 482.046
-    },
-    "repo_name": "DeltaHub/bitfit_t5-base_cola",
-    "test": {
-        "epoch": 20.0,
-        "test_average_metrics": 54.209563471221934,
-        "test_loss": 0.2853100299835205,
-        "test_matthews_correlation": 54.209563471221934,
-        "test_runtime": 1.056,
-        "test_samples_per_second": 494.304
-    }
-}
-{
-    "evaluate": {
-        "epoch": 20.0,
-        "eval_average_metrics": 53.80613287067274,
-        "eval_loss": 0.25723716616630554,
-        "eval_matthews_correlation": 53.80613287067274,
-        "eval_runtime": 1.0583,
-        "eval_samples_per_second": 492.299
-    },
-    "repo_name": "DeltaHub/bitfit_t5-base_cola",
-    "test": {
-        "epoch": 20.0,
-        "test_average_metrics": 54.32497579543861,
-        "test_loss": 0.22327613830566406,
-        "test_matthews_correlation": 54.32497579543861,
-        "test_runtime": 1.0556,
-        "test_samples_per_second": 494.507
-    }
-}
--- a/examples/examples_prompt/configs/adapter_clip-vit-base-patch32/beans.json
+++ b/examples/examples_prompt/configs/adapter_clip-vit-base-patch32/beans.json
@ -0,0 +1,48 @@
+{
+    "bottleneck_dim": 24,
+    "dataset_config_name": [
+        "en"
+    ],
+    "delta_type": "adapter",
+    "do_eval": true,
+    "do_test": true,
+    "do_train": true,
+    "eval_dataset_config_name": [
+        "en"
+    ],
+    "eval_dataset_name": "beans",
+    "eval_steps": 200,
+    "evaluation_strategy": "steps",
+    "greater_is_better": true,
+    "learning_rate": 0.0003,
+    "load_best_model_at_end": true,
+    "max_source_length": 256,
+    "metric_for_best_model": "average_metrics",
+    "model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/clip-vit-base-patch32",
+    "num_classes": 3,
+    "num_train_epochs": 20,
+    "output_dir": "outputs/adapter/clip-vit-base-patch32/beans",
+    "overwrite_output_dir": true,
+    "per_device_eval_batch_size": 32,
+    "per_device_train_batch_size": 32,
+    "predict_with_generate": true,
+    "push_to_delta_center": true,
+    "push_to_hub": false,
+    "save_steps": 200,
+    "save_strategy": "steps",
+    "save_total_limit": 1,
+    "seed": 42,
+    "split_validation_test": true,
+    "task_name": "beans",
+    "test_dataset_config_name": [
+        "en"
+    ],
+    "test_dataset_name": "beans",
+    "tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/clip-vit-base-patch32",
+    "unfrozen_modules": [
+        "deltas",
+        "layer_norm",
+        "final_layer_norm"
+    ],
+    "warmup_steps": 0
+}
--- a/examples/examples_prompt/configs/adapter_opt-350m/wikitext.json
+++ b/examples/examples_prompt/configs/adapter_opt-350m/wikitext.json
@ -0,0 +1,53 @@
+{
+    "backbone_model": "opt",
+    "bottleneck_dim": 24,
+    "dataset_config_name": [
+        "en"
+    ],
+    "datasets_load_from_disk": true,
+    "datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/",
+    "delta_type": "adapter",
+    "do_eval": true,
+    "do_test": true,
+    "do_train": true,
+    "eval_dataset_config_name": [
+        "en"
+    ],
+    "eval_dataset_name": "wikitext",
+    "eval_steps": 200,
+    "evaluation_strategy": "steps",
+    "gradient_accumulation_steps":2,
+    "greater_is_better": false,
+    "learning_rate": 0.0003,
+    "load_best_model_at_end": true,
+    "max_source_length": 900,
+    "metric_for_best_model": "average_metrics",
+    "model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/opt-350m",
+    "model_path_public": "opt-350m",
+    "num_train_epochs": 3,
+    "output_dir": "outputs/adapter/opt-350m/wikitext",
+    "overwrite_output_dir": true,
+    "per_device_eval_batch_size": 6,
+    "per_device_train_batch_size": 6,
+    "predict_with_generate": true,
+    "push_to_dc": true,
+    "push_to_hf": false,
+    "save_steps": 200,
+    "save_strategy": "steps",
+    "save_total_limit": 1,
+    "seed": 42,
+    "split_validation_test": true,
+    "task_name": "wikitext",
+    "test_dataset_config_name": [
+        "en"
+    ],
+    "test_dataset_name": "wikitext",
+    "tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/opt-350m",
+    "unfrozen_modules": [
+        "deltas",
+        "layer_norm",
+        "final_layer_norm"
+    ],
+    "warmup_steps": 0,
+    "modified_modules":["self_attn"]
+}
--- a/examples/examples_prompt/configs/adapter_vit-large-patch16-224-in21k/beans.json
+++ b/examples/examples_prompt/configs/adapter_vit-large-patch16-224-in21k/beans.json
@ -0,0 +1,53 @@
+{
+    "backbone_model": "vit",
+    "bottleneck_dim": 24,
+    "dataset_config_name": [
+        "en"
+    ],
+    "datasets_load_from_disk": false,
+    "datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/",
+    "delta_type": "adapter",
+    "do_eval": true,
+    "do_test": true,
+    "do_train": true,
+    "eval_dataset_config_name": [
+        "en"
+    ],
+    "eval_dataset_name": "beans",
+    "eval_steps": 200,
+    "evaluation_strategy": "steps",
+    "greater_is_better": true,
+    "learning_rate": 0.0003,
+    "load_best_model_at_end": true,
+    "max_source_length": 256,
+    "metric_for_best_model": "average_metrics",
+    "model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/vit-large-patch16-224-in21k",
+    "model_path_public": "vit-large-patch16-224-in21k",
+    "num_classes": 3,
+    "num_train_epochs": 20,
+    "output_dir": "outputs/adapter/vit-large-patch16-224-in21k/beans",
+    "overwrite_output_dir": true,
+    "per_device_eval_batch_size": 32,
+    "per_device_train_batch_size": 32,
+    "predict_with_generate": false,
+    "push_to_dc": true,
+    "push_to_hf": false,
+    "save_steps": 200,
+    "save_strategy": "steps",
+    "save_total_limit": 1,
+    "seed": 42,
+    "split_validation_test": true,
+    "task_name": "beans",
+    "test_dataset_config_name": [
+        "en"
+    ],
+    "test_dataset_name": "beans",
+    "tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/vit-large-patch16-224-in21k",
+    "unfrozen_modules": [
+        "deltas",
+        "layer_norm",
+        "final_layer_norm"
+    ],
+    "warmup_steps": 0,
+    "modified_modules":["output"]
+}
--- a/examples/examples_prompt/configs/bitfit_t5-large/rte.json
+++ b/examples/examples_prompt/configs/bitfit_t5-large/rte.json
@ -0,0 +1,51 @@
+{
+    "backbone_model": "t5-large",
+    "dataset_config_name": [
+        "en"
+    ],
+    "datasets_load_from_disk": true,
+    "datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/",
+    "delta_type": "bitfit",
+    "do_eval": true,
+    "do_test": true,
+    "do_train": true,
+    "eval_dataset_config_name": [
+        "en"
+    ],
+    "eval_dataset_name": "rte",
+    "eval_steps": 100,
+    "evaluation_strategy": "steps",
+    "greater_is_better": true,
+    "learning_rate": 0.0003,
+    "load_best_model_at_end": true,
+    "max_source_length": 256,
+    "metric_for_best_model": "average_metrics",
+    "model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/t5-large",
+    "model_path_public": "t5-large",
+    "num_train_epochs": 20,
+    "output_dir": "outputs/bitfit/t5-large/rte",
+    "overwrite_output_dir": true,
+    "per_device_eval_batch_size": 16,
+    "per_device_train_batch_size": 16,
+    "predict_with_generate": true,
+    "push_to_dc": true,
+    "push_to_hf": false,
+    "save_steps": 100,
+    "save_strategy": "steps",
+    "save_total_limit": 1,
+    "seed": 42,
+    "split_validation_test": true,
+    "task_name": "rte",
+    "test_dataset_config_name": [
+        "en"
+    ],
+    "test_dataset_name": "rte",
+    "tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/t5-large",
+    "unfrozen_modules": [
+        "deltas",
+        "layer_norm",
+        "final_layer_norm"
+    ],
+    "warmup_steps": 0,
+    "modified_modules":["attn", "ff", "layer_norm"]
+}
--- a/examples/examples_prompt/configs/compacter_blenderbot-3b/sst2.json
+++ b/examples/examples_prompt/configs/compacter_blenderbot-3b/sst2.json
@ -0,0 +1,66 @@
+{
+    "backbone_model": "blenderbot",
+    "dataset_config_name": [
+        "en"
+    ],
+    "datasets_load_from_disk": true,
+    "datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/",
+    "delta_type": "compacter",
+    "do_eval": true,
+    "do_test": true,
+    "do_train": true,
+    "eval_dataset_config_name": [
+        "en"
+    ],
+    "eval_dataset_name": "sst2",
+    "eval_steps": 200,
+    "evaluation_strategy": "steps",
+    "factorized_phm": true,
+    "factorized_phm_rule": false,
+    "gradient_clip": false,
+    "greater_is_better": true,
+    "hypercomplex_adapters": true,
+    "hypercomplex_division": 4,
+    "hypercomplex_nonlinearity": "glorot-uniform",
+    "learn_phm": true,
+    "learning_rate": 0.003,
+    "load_best_model_at_end": true,
+    "max_source_length": 128,
+    "metric_for_best_model": "average_metrics",
+    "model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/blenderbot-3b",
+    "model_path_public": "blenderbot-3b",
+    "non_linearity": "gelu_new",
+    "normalize_phm_weight": false,
+    "num_train_epochs": 3,
+    "output_dir": "outputs/compacter/blenderbot-3b/sst2",
+    "overwrite_output_dir": true,
+    "per_device_eval_batch_size": 32,
+    "per_device_train_batch_size": 32,
+    "phm_c_init": "normal",
+    "phm_clamp": false,
+    "phm_init_range": 0.0001,
+    "predict_with_generate": true,
+    "push_to_dc": true,
+    "push_to_hf": false,
+    "save_steps": 200,
+    "save_strategy": "steps",
+    "save_total_limit": 1,
+    "seed": 42,
+    "shared_phm_rule": false,
+    "split_validation_test": true,
+    "task_name": "sst2",
+    "test_dataset_config_name": [
+        "en"
+    ],
+    "test_dataset_name": "sst2",
+    "tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/blenderbot-3b",
+    "unfrozen_modules": [
+        "deltas",
+        "layer_norm",
+        "final_layer_norm"
+    ],
+    "use_bias_down_sampler": true,
+    "use_bias_up_sampler": true,
+    "warmup_steps": 0,
+    "modified_modules":["fc2"]
+}
--- a/examples/examples_prompt/configs/compacter_deberta-v2-xlarge/mnli.json
+++ b/examples/examples_prompt/configs/compacter_deberta-v2-xlarge/mnli.json
@ -0,0 +1,51 @@
+{
+    "backbone_model": "deberta-v2-xlarge",
+    "dataset_config_name": [
+        "en"
+    ],
+    "datasets_load_from_disk": true,
+    "datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/",
+    "delta_type": "compacter",
+    "do_eval": true,
+    "do_test": true,
+    "do_train": true,
+    "eval_dataset_config_name": [
+        "en"
+    ],
+    "eval_dataset_name": "mnli",
+    "eval_steps": 500,
+    "evaluation_strategy": "steps",
+    "greater_is_better": true,
+    "is_seq2seq": false,
+    "learning_rate": 0.0003,
+    "load_best_model_at_end": true,
+    "max_source_length": 256,
+    "metric_for_best_model": "average_metrics",
+    "model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/deberta-v2-xlarge",
+    "num_train_epochs": 3,
+    "output_dir": "outputs/compacter/deberta-v2-xlarge/mnli",
+    "overwrite_output_dir": true,
+    "per_device_eval_batch_size": 32,
+    "per_device_train_batch_size": 32,
+    "predict_with_generate": false,
+    "push_to_dc": true,
+    "push_to_hub": false,
+    "save_steps": 500,
+    "save_strategy": "steps",
+    "save_total_limit": 1,
+    "seed": 42,
+    "split_validation_test": true,
+    "task_name": "mnli",
+    "test_dataset_config_name": [
+        "en"
+    ],
+    "test_dataset_name": "mnli",
+    "tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/deberta-v2-xlarge",
+    "unfrozen_modules": [
+        "deltas",
+        "layer_norm",
+        "final_layer_norm"
+    ],
+    "warmup_steps": 0,
+    "modified_modules":["attention"]
+}
--- a/examples/examples_prompt/configs/compacter_long-t5-tglobal-large/rte.json
+++ b/examples/examples_prompt/configs/compacter_long-t5-tglobal-large/rte.json
@ -0,0 +1,51 @@
+{
+    "backbone_model": "long-t5",
+    "dataset_config_name": [
+        "en"
+    ],
+    "datasets_load_from_disk": true,
+    "datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/",
+    "delta_type": "compacter",
+    "do_eval": true,
+    "do_test": true,
+    "do_train": true,
+    "eval_dataset_config_name": [
+        "en"
+    ],
+    "eval_dataset_name": "rte",
+    "eval_steps": 100,
+    "evaluation_strategy": "steps",
+    "greater_is_better": true,
+    "learning_rate": 0.0003,
+    "load_best_model_at_end": true,
+    "max_source_length": 256,
+    "metric_for_best_model": "average_metrics",
+    "model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/long-t5-tglobal-large",
+    "model_path_public": "long-t5-tglobal-large",
+    "num_train_epochs": 20,
+    "output_dir": "outputs/compacter/long-t5-tglobal-large/rte",
+    "overwrite_output_dir": true,
+    "per_device_eval_batch_size": 16,
+    "per_device_train_batch_size": 16,
+    "predict_with_generate": true,
+    "push_to_dc": true,
+    "push_to_hf": false,
+    "save_steps": 100,
+    "save_strategy": "steps",
+    "save_total_limit": 1,
+    "seed": 42,
+    "split_validation_test": true,
+    "task_name": "rte",
+    "test_dataset_config_name": [
+        "en"
+    ],
+    "test_dataset_name": "rte",
+    "tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/long-t5-tglobal-large",
+    "unfrozen_modules": [
+        "deltas",
+        "layer_norm",
+        "final_layer_norm"
+    ],
+    "warmup_steps": 0,
+    "modified_modules":["attn", "ff", "layer_norm"]
+}
--- a/examples/examples_prompt/configs/gen_clip.py
+++ b/examples/examples_prompt/configs/gen_clip.py
@ -2,7 +2,7 @@ import collections
 import copy

 PATHBASE="/mnt/sfs_turbo/hsd/plm_cache/"
-PATHBASE="/home/hushengding/plm_cache/"
+# PATHBASE="/home/hushengding/plm_cache/"

 AllConfigs = {}

--- a/examples/examples_prompt/configs/gen_t5.py
+++ b/examples/examples_prompt/configs/gen_t5.py
@ -45,11 +45,14 @@ BaseConfigs['t5-base'] = {
                "greater_is_better": True,
                "evaluation_strategy": "steps",
                "overwrite_output_dir": True,
-                "push_to_hub": False,
-                "push_to_delta_center": True,
+                "push_to_hf": False,
+                "push_to_dc": True,
                "save_strategy": "steps",
                "datasets_load_from_disk": True,
-                "datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/"
+                "datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/",
+                "backbone_model": "t5", # use in delta center,
+                "model_path_public": "t5-base", # use in delta center,
+
            }

 AllConfigs['bitfit_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
--- a/examples/examples_prompt/configs/lora_beit-large-patch16-224/cifar10.json
+++ b/examples/examples_prompt/configs/lora_beit-large-patch16-224/cifar10.json
@ -0,0 +1,52 @@
+{
+    "backbone_model": "beit",
+    "dataset_config_name": [
+        "en"
+    ],
+    "datasets_load_from_disk": true,
+    "datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk",
+    "delta_type": "lora",
+    "do_eval": true,
+    "do_test": true,
+    "do_train": true,
+    "eval_dataset_config_name": [
+        "en"
+    ],
+    "eval_dataset_name": "cifar10",
+    "eval_steps": 200,
+    "evaluation_strategy": "steps",
+    "greater_is_better": true,
+    "learning_rate": 0.0003,
+    "load_best_model_at_end": true,
+    "max_source_length": 256,
+    "metric_for_best_model": "average_metrics",
+    "model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/beit-large-patch16-224",
+    "model_path_public": "beit-large-patch16-224",
+    "num_classes": 10,
+    "num_train_epochs": 20,
+    "output_dir": "outputs/lora/beit-large-patch16-224/cifar10",
+    "overwrite_output_dir": true,
+    "per_device_eval_batch_size": 32,
+    "per_device_train_batch_size": 32,
+    "predict_with_generate": false,
+    "push_to_dc": true,
+    "push_to_hf": false,
+    "save_steps": 200,
+    "save_strategy": "steps",
+    "save_total_limit": 1,
+    "seed": 42,
+    "split_validation_test": true,
+    "task_name": "cifar10",
+    "test_dataset_config_name": [
+        "en"
+    ],
+    "test_dataset_name": "cifar10",
+    "tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/beit-large-patch16-224",
+    "unfrozen_modules": [
+        "deltas",
+        "layer_norm",
+        "final_layer_norm"
+    ],
+    "warmup_steps": 0,
+    "modified_modules":["query","value"]
+}
--- a/examples/examples_prompt/configs/lora_gpt-j-6B/wikitext.json
+++ b/examples/examples_prompt/configs/lora_gpt-j-6B/wikitext.json
@ -0,0 +1,52 @@
+{
+    "backbone_model": "gpt-j",
+    "dataset_config_name": [
+        "en"
+    ],
+    "datasets_load_from_disk": true,
+    "datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/",
+    "delta_type": "lora",
+    "do_eval": true,
+    "do_test": true,
+    "do_train": true,
+    "eval_dataset_config_name": [
+        "en"
+    ],
+    "eval_dataset_name": "wikitext",
+    "eval_steps": 500,
+    "evaluation_strategy": "steps",
+    "gradient_accumulation_steps":4,
+    "greater_is_better": false,
+    "learning_rate": 0.00003,
+    "load_best_model_at_end": true,
+    "max_source_length": 512,
+    "metric_for_best_model": "average_metrics",
+    "model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/gpt-j-6B",
+    "model_path_public": "gpt-j-6B",
+    "num_train_epochs": 2,
+    "output_dir": "outputs/lora/gpt-j-6B/wikitext",
+    "overwrite_output_dir": true,
+    "per_device_eval_batch_size": 2,
+    "per_device_train_batch_size": 2,
+    "predict_with_generate": true,
+    "push_to_dc": true,
+    "push_to_hf": false,
+    "save_steps": 500,
+    "save_strategy": "steps",
+    "save_total_limit": 1,
+    "seed": 42,
+    "split_validation_test": true,
+    "task_name": "wikitext",
+    "test_dataset_config_name": [
+        "en"
+    ],
+    "test_dataset_name": "wikitext",
+    "tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/gpt-j-6B",
+    "unfrozen_modules": [
+        "deltas",
+        "layer_norm",
+        "final_layer_norm"
+    ],
+    "warmup_steps": 0,
+    "modified_modules":["20.attn.q_proj","21.attn.q_proj","22.attn.q_proj","23.attn.q_proj","24.attn.q_proj","25.attn.q_proj","26.attn.q_proj","27.attn.q_proj"]
+}
--- a/examples/examples_prompt/configs/lora_roberta-large/superglue-boolq.json
+++ b/examples/examples_prompt/configs/lora_roberta-large/superglue-boolq.json
@ -0,0 +1,52 @@
+{
+    "backbone_model": "roberta-large",
+    "dataset_config_name": [
+        "en"
+    ],
+    "datasets_load_from_disk": true,
+    "datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/",
+    "delta_type": "lora",
+    "do_eval": true,
+    "do_test": true,
+    "do_train": true,
+    "eval_dataset_config_name": [
+        "en"
+    ],
+    "eval_dataset_name": "superglue-boolq",
+    "eval_steps": 200,
+    "evaluation_strategy": "steps",
+    "greater_is_better": true,
+    "is_seq2seq": false,
+    "learning_rate": 0.0001,
+    "load_best_model_at_end": true,
+    "max_source_length": 256,
+    "metric_for_best_model": "average_metrics",
+    "model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/roberta-large",
+    "model_path_public": "roberta-large",
+    "num_train_epochs": 20,
+    "output_dir": "outputs/lora/roberta-large/superglue-boolq",
+    "overwrite_output_dir": true,
+    "per_device_eval_batch_size": 32,
+    "per_device_train_batch_size": 32,
+    "predict_with_generate": false,
+    "push_to_hub": false,
+    "push_to_dc": true,
+    "save_steps": 200,
+    "save_strategy": "steps",
+    "save_total_limit": 1,
+    "seed": 42,
+    "split_validation_test": true,
+    "task_name": "superglue-boolq",
+    "test_dataset_config_name": [
+        "en"
+    ],
+    "test_dataset_name": "superglue-boolq",
+    "tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/roberta-large",
+    "unfrozen_modules": [
+        "deltas",
+        "layer_norm",
+        "final_layer_norm"
+    ],
+    "warmup_steps": 0,
+    "modified_modules":["query","value"]
+}
--- a/examples/examples_prompt/configs/lora_xlm-roberta-large/superglue-wic.json
+++ b/examples/examples_prompt/configs/lora_xlm-roberta-large/superglue-wic.json
@ -0,0 +1,52 @@
+{
+    "backbone_model": "xlm-roberta-large",
+    "dataset_config_name": [
+        "en"
+    ],
+    "datasets_load_from_disk": true,
+    "datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/",
+    "delta_type": "lora",
+    "do_eval": true,
+    "do_test": true,
+    "do_train": true,
+    "eval_dataset_config_name": [
+        "en"
+    ],
+    "eval_dataset_name": "superglue-wic",
+    "eval_steps": 100,
+    "evaluation_strategy": "steps",
+    "greater_is_better": true,
+    "is_seq2seq": false,
+    "learning_rate": 0.0003,
+    "load_best_model_at_end": true,
+    "max_source_length": 256,
+    "metric_for_best_model": "average_metrics",
+    "model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/xlm-roberta-large",
+    "model_path_public": "xlm-roberta-large",
+    "num_train_epochs": 20,
+    "output_dir": "outputs/lora/xlm-roberta-large/superglue-wic",
+    "overwrite_output_dir": true,
+    "per_device_eval_batch_size": 16,
+    "per_device_train_batch_size": 16,
+    "predict_with_generate": false,
+    "push_to_dc": true,
+    "push_to_hub": false,
+    "save_steps": 100,
+    "save_strategy": "steps",
+    "save_total_limit": 1,
+    "seed": 42,
+    "split_validation_test": true,
+    "task_name": "superglue-wic",
+    "test_dataset_config_name": [
+        "en"
+    ],
+    "test_dataset_name": "superglue-wic",
+    "tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/xlm-roberta-large",
+    "unfrozen_modules": [
+        "deltas",
+        "layer_norm",
+        "final_layer_norm"
+    ],
+    "warmup_steps": 0,
+    "modified_modules":["query","value"]
+}
--- a/examples/examples_prompt/configs/low_rank_adapter_gpt2/wikitext.json
+++ b/examples/examples_prompt/configs/low_rank_adapter_gpt2/wikitext.json
@ -0,0 +1,52 @@
+{
+    "backbone_model": "gpt2",
+    "dataset_config_name": [
+        "en"
+    ],
+    "datasets_load_from_disk": true,
+    "datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/",
+    "delta_type": "low_rank_adapter",
+    "do_eval": true,
+    "do_test": true,
+    "do_train": true,
+    "eval_dataset_config_name": [
+        "en"
+    ],
+    "eval_dataset_name": "wikitext",
+    "eval_steps": 200,
+    "evaluation_strategy": "steps",
+    "gradient_accumulation_steps":1,
+    "greater_is_better": false,
+    "learning_rate": 0.0003,
+    "load_best_model_at_end": true,
+    "max_source_length": 768,
+    "metric_for_best_model": "average_metrics",
+    "model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/gpt2",
+    "model_path_public": "gpt2",
+    "num_train_epochs": 2,
+    "output_dir": "outputs/low_rank_adapter/gpt2/wikitext",
+    "overwrite_output_dir": true,
+    "per_device_eval_batch_size": 16,
+    "per_device_train_batch_size": 16,
+    "predict_with_generate": true,
+    "push_to_dc": true,
+    "push_to_hf": false,
+    "save_steps": 200,
+    "save_strategy": "steps",
+    "save_total_limit": 1,
+    "seed": 42,
+    "split_validation_test": true,
+    "task_name": "wikitext",
+    "test_dataset_config_name": [
+        "en"
+    ],
+    "test_dataset_name": "wikitext",
+    "tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/gpt2",
+    "unfrozen_modules": [
+        "deltas",
+        "layer_norm",
+        "final_layer_norm"
+    ],
+    "warmup_steps": 0,
+    "modified_modules":["attn","mlp"]
+}
--- a/examples/examples_prompt/configs/prefix_bert-large-cased/rte.json
+++ b/examples/examples_prompt/configs/prefix_bert-large-cased/rte.json
@ -0,0 +1,51 @@
+{
+    "backbone_model": "bert-large-cased",
+    "dataset_config_name": [
+        "en"
+    ],
+    "datasets_load_from_disk": true,
+    "datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/",
+    "delta_type": "prefix",
+    "do_eval": true,
+    "do_test": true,
+    "do_train": true,
+    "eval_dataset_config_name": [
+        "en"
+    ],
+    "eval_dataset_name": "rte",
+    "eval_steps": 100,
+    "evaluation_strategy": "steps",
+    "greater_is_better": true,
+    "is_seq2seq": false,
+    "learning_rate": 0.0003,
+    "load_best_model_at_end": true,
+    "max_source_length": 256,
+    "metric_for_best_model": "average_metrics",
+    "model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/bert-large-cased",
+    "num_train_epochs": 20,
+    "output_dir": "outputs/prefix/bert-large-cased/rte",
+    "overwrite_output_dir": true,
+    "per_device_eval_batch_size": 16,
+    "per_device_train_batch_size": 16,
+    "predict_with_generate": false,
+    "push_to_dc": true,
+    "push_to_hub": false,
+    "save_steps": 100,
+    "save_strategy": "steps",
+    "save_total_limit": 1,
+    "seed": 42,
+    "split_validation_test": true,
+    "task_name": "rte",
+    "test_dataset_config_name": [
+        "en"
+    ],
+    "test_dataset_name": "rte",
+    "tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/bert-large-cased",
+    "unfrozen_modules": [
+        "deltas",
+        "layer_norm",
+        "final_layer_norm"
+    ],
+    "warmup_steps": 0,
+    "modified_modules":["attention"]
+}
--- a/examples/examples_prompt/configs/soft_prompt_bart-large/superglue-boolq.json
+++ b/examples/examples_prompt/configs/soft_prompt_bart-large/superglue-boolq.json
@ -0,0 +1,51 @@
+{
+    "backbone_model": "bart",
+    "dataset_config_name": [
+        "en"
+    ],
+    "datasets_load_from_disk": true,
+    "datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/",
+    "delta_type": "soft_prompt",
+    "do_eval": true,
+    "do_test": true,
+    "do_train": true,
+    "eval_dataset_config_name": [
+        "en"
+    ],
+    "eval_dataset_name": "superglue-boolq",
+    "eval_steps": 500,
+    "evaluation_strategy": "steps",
+    "gradient_accumulation_steps":1,
+    "greater_is_better": true,
+    "learning_rate": 0.1,
+    "load_best_model_at_end": true,
+    "max_source_length": 256,
+    "metric_for_best_model": "average_metrics",
+    "model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/bart-large",
+    "model_path_public": "bart-large",
+    "num_train_epochs": 50,
+    "output_dir": "outputs/soft_prompt/bart-large/superglue-boolq",
+    "overwrite_output_dir": true,
+    "per_device_eval_batch_size": 32,
+    "per_device_train_batch_size": 32,
+    "predict_with_generate": true,
+    "push_to_dc": true,
+    "push_to_hf": false,
+    "save_steps": 500,
+    "save_strategy": "steps",
+    "save_total_limit": 1,
+    "seed": 42,
+    "soft_token_num":100,
+    "split_validation_test": true,
+    "task_name": "superglue-boolq",
+    "test_dataset_config_name": [
+        "en"
+    ],
+    "test_dataset_name": "superglue-boolq",
+    "tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/bart-large",
+    "token_init": true,
+    "unfrozen_modules": [
+        "deltas"
+    ],
+    "warmup_steps": 0
+}
--- a/examples/examples_prompt/data_processors/processor.py
+++ b/examples/examples_prompt/data_processors/processor.py
@ -93,4 +93,10 @@ class AbstractTask(abc.ABC):
            # shuffles the data and samples it.
            if n_obs is not None:
                dataset = self.subsample(dataset, n_obs)
-        return dataset.map(self.preprocessor)
+
+        this_method = getattr(self.__class__, 'preprocessor')
+        base_method = getattr(AbstractTask, 'preprocessor')
+        if this_method is not base_method:
+            return dataset.map(self.preprocessor)
+        else:
+            return dataset
--- a/examples/examples_prompt/data_processors/tasks.py
+++ b/examples/examples_prompt/data_processors/tasks.py
@ -12,22 +12,16 @@ import logging
 import numpy as np
 import torch
 import re
-from openprompt.prompts import ManualTemplate, ManualVerbalizer
-from openprompt.plms.utils import TokenizerWrapper
-from openprompt.data_utils import InputExample
-from openprompt.prompts import GenerationVerbalizer
 import itertools
-
+import os

 logger = logging.getLogger(__name__)


-
 from transformers.models.auto.tokenization_auto import tokenizer_class_from_name

 from typing import List, Dict
 from collections import defaultdict
-from openprompt.utils import round_list
 import warnings


@ -68,7 +62,8 @@ class COLA(AbstractTask):
    }

    def load_dataset(self, split):
-        if self.data_args.datasets_load_from_disk:
+        offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
+        if offline == '1':
            return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/glue.cola")[split]
        else:
            return datasets.load_dataset('glue', 'cola',
@ -96,7 +91,8 @@ class SST2(AbstractTask):
    }

    def load_dataset(self, split):
-        if self.data_args.datasets_load_from_disk:
+        offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
+        if offline == '1':
            return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/glue.sst2")[split]
        else:
            return datasets.load_dataset('glue', 'sst2',
@ -123,10 +119,9 @@ class MRPC(AbstractTask):
    }


-
-
    def load_dataset(self, split):
-        if self.data_args.datasets_load_from_disk:
+        offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
+        if offline == '1':
            return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/glue.mrpc")[split]
        else:
            return datasets.load_dataset('glue', 'mrpc', split=split, script_version="master")
@ -152,7 +147,8 @@ class QQP(AbstractTask):


    def load_dataset(self, split):
-        if self.data_args.datasets_load_from_disk:
+        offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
+        if offline == '1':
            return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/glue.qqp")[split]
        else:
            return datasets.load_dataset('glue', 'qqp',
@ -208,7 +204,8 @@ class MNLI(AbstractTask):
    }

    def load_dataset(self, split):
-        if self.data_args.datasets_load_from_disk:
+        offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
+        if offline == '1':
            return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/glue.mnli")[split]
        else:
            return datasets.load_dataset('glue', 'mnli', split=split, script_version="master")
@ -243,7 +240,8 @@ class QNLI(AbstractTask):


    def load_dataset(self, split):
-        if self.data_args.datasets_load_from_disk:
+        offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
+        if offline == '1':
            return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/glue.qnli")[split]
        else:
            return datasets.load_dataset('glue', 'qnli', split=split, script_version="master")
@ -279,7 +277,8 @@ class RTE(AbstractTask):
    }

    def load_dataset(self, split):
-        if self.data_args.datasets_load_from_disk:
+        offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
+        if offline == '1':
            return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/glue.rte")[split]
        else:
            return datasets.load_dataset('glue', 'rte',
@ -306,7 +305,8 @@ class WNLI(AbstractTask):


    def load_dataset(self, split):
-        if self.data_args.datasets_load_from_disk:
+        offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
+        if offline == '1':
            return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/glue.wnli")[split]
        else:
            return datasets.load_dataset('glue', 'wnli', split=split, script_version="master")
@ -334,7 +334,8 @@ class SuperGLUEBoolQ(AbstractTask):
    }

    def load_dataset(self, split):
-        if self.data_args.datasets_load_from_disk:
+        offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
+        if offline == '1':
            return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/super_glue.boolq")[split]
        else:
            return datasets.load_dataset('super_glue', 'boolq', split=split, script_version="master")
@ -347,8 +348,8 @@ class SuperGLUECB(AbstractTask):
    split_to_data_split = {"train": "train",
                           "validation": "validation",
                           "test": "validation"}
-    metric = [metrics.mean_multiclass_f1(num_classes=3), metrics.accuracy]
-    metric_names = ["f1_multiclass", "accuracy"]
+    metric = [metrics.accuracy]
+    metric_names = ["accuracy"]

    verbalizers = {
        "0":{"0": "yes",
@ -361,7 +362,8 @@ class SuperGLUECB(AbstractTask):
    }

    def load_dataset(self, split):
-        if self.data_args.datasets_load_from_disk:
+        offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
+        if offline == '1':
            return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/super_glue.cb")[split]
        else:
            return datasets.load_dataset('super_glue', 'cb', split=split, script_version="master")
@ -387,7 +389,8 @@ class SuperGLUECOPA(AbstractTask):
    }

    def load_dataset(self, split):
-        if self.data_args.datasets_load_from_disk:
+        offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
+        if offline == '1':
            return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/super_glue.copa")[split]
        else:
            return datasets.load_dataset('super_glue', 'copa', split=split, script_version="master")
@ -416,7 +419,8 @@ class SuperGLUEMultiRC(AbstractTask):


    def load_dataset(self, split):
-        if self.data_args.datasets_load_from_disk:
+        offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
+        if offline == '1':
            return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/super_glue.multirc")[split]
        else:
            return datasets.load_dataset('super_glue', 'multirc', split=split, script_version="master")
@ -459,7 +463,8 @@ class SuperGLUEWIC(AbstractTask):
    }

    def load_dataset(self, split):
-        if self.data_args.datasets_load_from_disk:
+        offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
+        if offline == '1':
            return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/super_glue.wic")[split]
        else:
            return datasets.load_dataset('super_glue', 'wic', split=split, script_version="master")
@ -549,13 +554,76 @@ class Beans(AbstractTask):

    def load_dataset(self, split):
        # from IPython import embed; embed(header="beans")
-        if self.data_args.datasets_load_from_disk:
-            return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/super_glue.wic")[split]
+        offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
+        if offline == '1':
+            return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/beans")[split]
        else:
            return datasets.load_dataset('beans', split=split, script_version="master")

+class Wikitext(AbstractTask):
+    #wikitext-2-v1
+    name = "wikitext"
+    # labels_list = ['angular_leaf_spot', 'bean_rust', "healthy"]
+    split_to_data_split = {"train": "train",
+                           "validation": "validation",
+                           "test": "validation"}
+    metric = [metrics.perplexity]
+    metric_names = ["perplexity"]

+    verbalizers = {
+        "0": {
+        }
+    }

+    templates_text = {
+        "0": """{"meta":"text"}"""
+    }
+    split_valid_to_make_test = True
+    def load_dataset(self, split):
+        # from IPython import embed; embed(header="beans")
+        if self.data_args.datasets_load_from_disk:
+            return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/wikitext")[split]
+        else:
+            return datasets.load_dataset('wikitext','wikitext-2-v1', split=split, script_version="master")
+
+class Cifar10(AbstractTask):
+    name = "cifar10"
+
+    split_to_data_split = {"train": "train",
+                           "validation": "test",
+                           "test": "test"}
+    metric = [metrics.accuracy]
+    metric_names = ["accuracy"]
+
+    def load_dataset(self, split):
+        if self.data_args.datasets_load_from_disk:
+            d = datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/cifar10")[split].select(range(100))
+            print(d)
+            return d
+        else:
+            return datasets.load_dataset('cifar10', split=split, script_version="master")
+    # def preprocessor(self, example):
+    #     example_ = {}
+    #     example_["image"] = example["image"]
+    #     example_["labels"] = example["label"]
+
+    #     return example_
+class Fashion_MNIST(AbstractTask):
+    name = "Fashion-MNIST"
+
+    split_to_data_split = {"train": "train",
+                           "validation": "test",
+                           "test": "test"}
+    metric = [metrics.accuracy]
+    metric_names = ["accuracy"]
+
+    def load_dataset(self, split):
+        if self.data_args.datasets_load_from_disk:
+            d = datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/fashion_mnist")[split]
+            print(d)
+            return d
+        else:
+            return datasets.load_dataset('fashion_mnist', split=split, script_version="master")

 TASK_MAPPING = OrderedDict(
    [
@ -575,7 +643,10 @@ TASK_MAPPING = OrderedDict(
        ('superglue-multirc', SuperGLUEMultiRC),
        ('superglue-wic', SuperGLUEWIC),
        # ('superglue-record', SuperGLUERecord)
-        ('beans', Beans)
+        ('beans', Beans),
+        ('wikitext',Wikitext),
+        ('cifar10',Cifar10),
+        ('fashion_mnist',Fashion_MNIST)
    ]
 )

--- a/examples/examples_prompt/metrics/metrics.py
+++ b/examples/examples_prompt/metrics/metrics.py
@ -11,6 +11,14 @@ import sklearn.metrics

 logger = getLogger(__name__)

+def perplexity(outputs, targets,ignore_index=-100):
+    """Computes the perplexity accuracy."""
+    
+    ce = -np.log(outputs).mean()
+    # ce = F.cross_entropy(torch.Tensor(outputs).view(-1, outputs.shape[-1]), torch.Tensor(targets).view(-1).long(),ignore_index=ignore_index)
+
+    return {"perplexity":float(np.exp(ce))}
+
 def accuracy(predictions, targets) -> dict:
    """Computes the average accuracy."""
    return {"accuracy": 100 * ((np.array(predictions) == np.array(targets)).mean())}
@ -47,20 +55,20 @@ def spearman_corrcoef(predictions, targets) -> dict:



-def spearman_corrcoef(predictions, targets) -> dict:
-    """Computes Spearman correlation coefficient."""
-    # TODO: we need to do postprocessors in a clean way for each dataset.
-    from examples_seq2seq.data_processors.postprocessors import string_to_float
-    targets = [string_to_float(target) for target in targets]
-    predictions= [string_to_float(prediction) for prediction in predictions]
-    spearman_corrcoef = 100 * scipy.stats.spearmanr(targets, predictions)[0]
+# def spearman_corrcoef(predictions, targets) -> dict:
+#     """Computes Spearman correlation coefficient."""
+#     # TODO: we need to do postprocessors in a clean way for each dataset.
+#     from examples_seq2seq.data_processors.postprocessors import string_to_float
+#     targets = [string_to_float(target) for target in targets]
+#     predictions= [string_to_float(prediction) for prediction in predictions]
+#     spearman_corrcoef = 100 * scipy.stats.spearmanr(targets, predictions)[0]

-    # Note that if all the predictions will be the same, spearman
-    # correlation is nan, to gaurad against this, we check the output
-    # and return 0 in this case.
-    if math.isnan(spearman_corrcoef):
-        spearman_corrcoef = 0
-    return {"spearmanr": spearman_corrcoef}
+#     # Note that if all the predictions will be the same, spearman
+#     # correlation is nan, to gaurad against this, we check the output
+#     # and return 0 in this case.
+#     if math.isnan(spearman_corrcoef):
+#         spearman_corrcoef = 0
+#     return {"spearmanr": spearman_corrcoef}


 def f1_score_with_invalid(predictions, targets) -> dict:
@ -102,8 +110,8 @@ def f1_score(predictions, targets) -> dict:
    Returns:
      F1 score, where any prediction != 0 or 1 is counted as wrong.
    """
-    targets = targets.astype(np.int32)
-    predictions = predictions.astype(np.int32)
+    targets = np.array(targets).astype(np.int32)
+    predictions = np.array(predictions).astype(np.int32)
    return {"f1": 100 * sklearn.metrics.f1_score(targets, predictions)}

 # TODO: maybe gaurd against invalid values https://stackoverflow.com/questions/56865344/how-do-i-calculate-the-matthews-correlation-coefficient-in-tensorflow
--- a/examples/examples_prompt/src/run.py
+++ b/examples/examples_prompt/src/run.py
@ -26,10 +26,12 @@ You can also adapt this script on your own tasks.

 import os
 import sys
+
 os.environ['MKL_THREADING_LAYER'] = 'GNU'
 os.environ['MKL_SERVICE_FORCE_INTEL'] = '1'
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 sys.path.append(os.path.join(os.getcwd(), "../"))
+# sys.path.append(os.path.join(os.getcwd(), "/mnt/sfs_turbo/zhangzhen/OpenDelta"))
 sys.path.append(os.path.join(os.getcwd()))

 import functools
@ -56,7 +58,7 @@ from transformers.trainer_utils import is_main_process, get_last_checkpoint

 from data_processors import AutoTask #, #TaskDataCollatorForSeq2Seq, AutoPostProcessor, data_collator
 from utils import read_json, save_json
-from utils.args import ModelArguments, TrainingArguments, DataTrainingArguments, RemainArgHfArgumentParser
+from utils.args import ModelArguments, TrainingArguments, DataTrainingArguments, DeltaArguments, RemainArgHfArgumentParser


 logger = logging.getLogger(__name__)
@ -66,16 +68,14 @@ def main():
    # See all possible arguments in src/transformers/training_args.py
    # or by passing the --help flag to this script.
    # We now keep distinct sets of args, for a cleaner separation of concerns.
-    parser = RemainArgHfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
-    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
-        # If we pass only one argument to the script and it's the path to a json file,
-        # let's parse it to get our arguments.
-        model_args, data_args, training_args, delta_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
-    else:
-        model_args, data_args, training_args, delta_args = parser.parse_args_into_dataclasses(return_remaining_strings=True)
+    parser = RemainArgHfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments, DeltaArguments))

+    # You can provide a json file with contains the arguments and use the --argument some_arg to override or append to  the json file.
+    json_file, cmd_args = (os.path.abspath(sys.argv[1]), sys.argv[2:]) if sys.argv[1].endswith(".json") else (None, sys.argv[1:])
+    model_args, data_args, training_args, delta_args, remain_args = parser.parse_json_file_with_cmd_args(json_file=json_file, command_line_args=cmd_args)
+    logger.warning("The following arguments not used! {}".format(remain_args))

-    print(f"{training_args.output_dir}/results.json")
+    logger.info(f"The results will be used in {training_args.output_dir}/results.json")
    # exit()
    # Detecting last checkpoint.
    last_checkpoint = None
@ -121,7 +121,8 @@ def main():



-    if os.path.basename(model_args.model_name_or_path).startswith("t5"):
+    if os.path.basename(model_args.model_name_or_path).startswith("t5") \
+        or os.path.basename(model_args.model_name_or_path).startswith("long-t5") :
        from examples_prompt.backbones.t5 import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts
        from examples_prompt.backbones.t5 import Trainer, DataCollator
    elif  os.path.basename(model_args.model_name_or_path).startswith("blenderbot"):
@ -129,7 +130,9 @@ def main():
        from examples_prompt.backbones.blenderbot import Trainer, DataCollator
    elif os.path.basename(model_args.model_name_or_path).startswith("roberta") \
        or os.path.basename(model_args.model_name_or_path).startswith("bert") \
-          or os.path.basename(model_args.model_name_or_path).startswith("albert") :
+          or os.path.basename(model_args.model_name_or_path).startswith("albert") \
+            or os.path.basename(model_args.model_name_or_path).startswith("xlm-roberta") \
+                or os.path.basename(model_args.model_name_or_path).startswith("deberta") :
        from examples_prompt.backbones.bert import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts
        from examples_prompt.backbones.bert import Trainer, DataCollator
    elif os.path.basename(model_args.model_name_or_path).startswith("beit"):
@ -144,6 +147,10 @@ def main():
    elif os.path.basename(model_args.model_name_or_path).startswith("clip"):
        from examples_prompt.backbones.clip import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts
        from examples_prompt.backbones.clip import Trainer, DataCollator
+    elif os.path.basename(model_args.model_name_or_path).startswith("opt") \
+        or os.path.basename(model_args.model_name_or_path).startswith("gpt"):
+        from examples_prompt.backbones.opt import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts
+        from examples_prompt.backbones.opt import Trainer, DataCollator



@ -161,7 +168,8 @@ def main():

    if delta_args.delta_type.lower() != "none":
        from opendelta import AutoDeltaConfig,AutoDeltaModel
-        delta_config = AutoDeltaConfig.from_dict(vars(delta_args))
+        from dataclasses import asdict
+        delta_config = AutoDeltaConfig.from_dict(asdict(delta_args))
        delta_model = AutoDeltaModel.from_config(delta_config, backbone_model=model)
        delta_model.freeze_module(set_state_dict = True)
        delta_model.log(delta_ratio=True, trainable_ratio=True, visualization=True)
@ -278,14 +286,9 @@ def main():

    if torch.cuda.is_available() and training_args.compute_memory:
        peak_memory = (torch.cuda.max_memory_allocated() / 1024 ** 2)/1000
-        print(
-            "Memory utilization",
-            peak_memory,
-            "GB"
-        )
        performance_metrics.update({"peak_memory": peak_memory})
    if training_args.compute_memory or training_args.compute_time:
-        print("Efficiency Statistics {}".format(performance_metrics))
+        logger.info("Efficiency Statistics {}".format(performance_metrics))
        trainer.save_metrics("performance", performance_metrics)

    # Evaluation
@ -313,17 +316,30 @@ def main():
        trainer.save_metrics(f"{data_args.task_name}_test", metrics)
        all_results['test'][data_args.task_name] = metrics

+    # from opendelta.utils.delta_hub import create_hub_repo_name
+    # from opendelta.utils.delta_center import create_delta_center_args, create_repo_name
+
    # repo_name = create_hub_repo_name(root="DeltaHub",
    #                      dataset=data_args.task_name,
    #                      delta_type = delta_args.delta_type,
    #                      model_name_or_path= model_args.model_name_or_path)
-    # results['repo_name'] = repo_name
-    # if delta_args.delta_type.lower() != "none":
-    #     if training_args.push_to_hub: # TODO add description here
-    #         delta_model.save_finetuned(push_to_hub=True, save_directory=repo_name, use_auth_token=True)
-    #         # trainer.push_to_hub(**kwargs)
-    #     else:
-    #         delta_model.save_finetuned(push_to_hub=False, save_directory=repo_name, use_auth_token=True)
+
+    # center_args =
+    # repo_name = create_repo_name(prefix="", center_args=center_args)
+    # all_results['repo_name'] = repo_name
+
+
+    delta_model.save_finetuned(finetuned_delta_path=delta_args.finetuned_delta_path,
+                               push_to_dc=training_args.push_to_dc,
+                               center_args={"test_performance":all_results['test'][data_args.task_name]['test_average_metrics'],
+                                            },
+                               center_args_pool = {**vars(model_args), **vars(data_args), **vars(training_args), **vars(delta_args)},
+                               list_tags = ['NLI'],
+                               dict_tags = {'purpose':'for testing'},
+                               delay_push=True,
+                               test_result=all_results['test']
+                            )
+


    with open(f"{training_args.output_dir}/results.json", 'w') as fout:
--- a/examples/examples_prompt/src/test.py
+++ b/examples/examples_prompt/src/test.py
@ -0,0 +1,344 @@
+# coding=utf-8
+# Copyright OpenDelta Team and THUNLP lab. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+A unified runing scripts for most models to do down stream tasks in a
+prompt learning fashion, i.e., No classification head, all tasks are casted
+to mask prediction or span prediction tasks.
+
+Processing relevant to different backbone models are stored in ../backbones/
+
+Adding A few lines to integrate the Delta tuning methods.
+
+You can also adapt this script on your own tasks.
+"""
+
+import os
+import sys
+os.environ['MKL_THREADING_LAYER'] = 'GNU'
+os.environ['MKL_SERVICE_FORCE_INTEL'] = '1'
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+sys.path.append(os.path.join(os.getcwd(), "../"))
+sys.path.append(os.path.join(os.getcwd()))
+
+import functools
+import logging
+import torch
+import json
+import numpy as np
+
+import transformers
+from transformers import (
+    AutoConfig,
+    AutoModelForMaskedLM,
+    AutoModelForSeq2SeqLM,
+    AutoTokenizer,
+    DataCollatorForSeq2Seq,
+    # HfArgumentParser,
+    # MBartTokenizer,
+    # default_data_collator,
+    Trainer,
+    Seq2SeqTrainer,
+    set_seed,
+)
+from transformers.trainer_utils import is_main_process, get_last_checkpoint
+
+from data_processors import AutoTask #, #TaskDataCollatorForSeq2Seq, AutoPostProcessor, data_collator
+from utils import read_json, save_json
+from utils.args import ModelArguments, TrainingArguments, DataTrainingArguments, RemainArgHfArgumentParser, DeltaArguments
+
+
+logger = logging.getLogger(__name__)
+
+
+def main():
+    parser = RemainArgHfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments, DeltaArguments))
+
+    # You can provide a json file with contains the arguments and use the --argument some_arg to override or append to  the json file.
+    json_file, cmd_args = (os.path.abspath(sys.argv[1]), sys.argv[2:]) if sys.argv[1].endswith(".json") else (None, sys.argv[1:])
+    model_args, data_args, training_args, delta_args, remain_args = parser.parse_json_file_with_cmd_args(json_file=json_file, command_line_args=cmd_args)
+    logger.warning("The following arguments not used! {}".format(remain_args))
+
+    # # exit()
+    # # Detecting last checkpoint.
+    # last_checkpoint = None
+    # if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
+    #     last_checkpoint = get_last_checkpoint(training_args.output_dir)
+    #     print("#### last_checkpoint ", last_checkpoint)
+    #     if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
+    #         '''
+    #         raise ValueError(
+    #             f"Output directory ({training_args.output_dir}) already exists and is not empty. "
+    #             "Use --overwrite_output_dir to overcome."
+    #         )
+    #         '''
+    #         pass
+    #     elif last_checkpoint is not None:
+    #         logger.info(
+    #             f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
+    #             "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
+    #         )
+
+    # Setup logging
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
+
+    # Log on each process the small summary:
+    logger.warning(
+        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
+        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
+    )
+    # Set the verbosity to info of the Transformers logger (on main process only):
+    if is_main_process(training_args.local_rank):
+        transformers.utils.logging.set_verbosity_info()
+    # logger.info("Training/evaluation parameters %s", training_args, model_args, data_args, delta_args)
+    logger.info("{}\n{}\n{}\n{}".format(training_args, model_args, data_args, delta_args))
+
+
+    # Set seed before initializing model.
+    set_seed(training_args.seed)
+
+
+
+    if os.path.basename(model_args.model_name_or_path).startswith("t5"):
+        from examples_prompt.backbones.t5 import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts
+        from examples_prompt.backbones.t5 import Trainer, DataCollator
+    elif  os.path.basename(model_args.model_name_or_path).startswith("blenderbot"):
+        from examples_prompt.backbones.blenderbot import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts
+        from examples_prompt.backbones.blenderbot import Trainer, DataCollator
+    elif os.path.basename(model_args.model_name_or_path).startswith("roberta") \
+        or os.path.basename(model_args.model_name_or_path).startswith("bert") \
+          or os.path.basename(model_args.model_name_or_path).startswith("albert") :
+        from examples_prompt.backbones.bert import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts
+        from examples_prompt.backbones.bert import Trainer, DataCollator
+    elif os.path.basename(model_args.model_name_or_path).startswith("beit"):
+        from examples_prompt.backbones.beit import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts
+        from examples_prompt.backbones.beit import Trainer, DataCollator
+    elif os.path.basename(model_args.model_name_or_path).startswith("bart"):
+        from examples_prompt.backbones.bart import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts
+        from examples_prompt.backbones.bart import Trainer, DataCollator
+    elif os.path.basename(model_args.model_name_or_path).startswith("bigbird"):
+        from examples_prompt.backbones.bigbird import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts
+        from examples_prompt.backbones.bigbird import Trainer, DataCollator
+    elif os.path.basename(model_args.model_name_or_path).startswith("clip"):
+        from examples_prompt.backbones.clip import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts
+        from examples_prompt.backbones.clip import Trainer, DataCollator
+
+
+
+    config, tokenizer, model = get_backbone(model_args=model_args)
+
+    # model parallelize
+    if hasattr(training_args, "model_parallel") and training_args.model_parallel:
+        logger.info('parallelize model!')
+        model.parallelize()
+
+    from opendelta import Visualization
+    Visualization(model).structure_graph()
+
+    if delta_args.delta_type.lower() != "none":
+        from opendelta.delta_models.adapter import AdapterConfig, AdapterModel
+        delta_config = AdapterConfig.from_finetuned(finetuned_delta_path=delta_args.finetuned_delta_path)
+        delta_model = AdapterModel.from_finetuned(finetuned_delta_path=delta_args.finetuned_delta_path,
+                    delta_config=delta_config,
+                    backbone_model=model,
+                    force_download=delta_args.force_download,
+                    cache_dir=delta_args.delta_cache_dir)
+        # delta_model.freeze_module(set_state_dict = True)
+        delta_model.log(delta_ratio=True, trainable_ratio=True, visualization=True)
+
+
+    performance_metrics = {}
+
+
+
+
+    non_empty_splits_names = []
+    # if training_args.do_train:
+    #     non_empty_splits_names.append("train")
+    # if training_args.do_eval:
+    #     non_empty_splits_names.append("eval")
+    if training_args.do_test:
+        non_empty_splits_names.append("test")
+    splits = {}
+    for split_name in ['test']:
+        if split_name not in non_empty_splits_names:
+            splits[split_name] = None
+            continue
+
+        task = AutoTask.get(data_args.task_name,
+                            data_args.dataset_config_name,
+                            data_args=data_args,
+                            seed=data_args.data_sample_seed)
+
+        dataset =  task.get(split=split_name,
+                            split_validation_test=training_args.split_validation_test,
+                            n_obs=data_args.max_train_samples)
+
+
+
+        template, _verbalizer, tokenizer_wrapper = get_prompts(task, tokenizer, data_args)
+
+
+        dataset = dataset.map(
+                            functools.partial(preprocess_function,
+                                            data_args=data_args,
+                                            tokenizer=tokenizer,
+                                            template=template,
+                                            verbalizer=_verbalizer,
+                                            tokenizer_wrapper=tokenizer_wrapper,
+                                            split=split_name),
+                            batched=False,
+                            num_proc=data_args.preprocessing_num_workers,
+                            remove_columns=get_remove_columns(list(dataset.features.keys())),
+                            load_from_cache_file=not data_args.overwrite_cache,
+                        )
+        # from IPython import embed; embed()
+        splits[split_name] = dataset
+        if split_name == "test":
+            eval_task = task
+            verbalizer = _verbalizer
+
+
+
+    trainer = Trainer(
+        model=model,
+        verbalizer=verbalizer,
+        eval_task=eval_task,
+        args=training_args,
+        # train_dataset=splits['train'],
+        # eval_dataset=splits['eval'],
+        tokenizer=tokenizer,
+        data_collator=DataCollator(tokenizer),
+    )
+
+
+    def save_training_config(config_file, output_dir):
+        json_data = read_json(config_file)
+        save_json(os.path.join(output_dir, "training_config.json"), json_data)
+
+
+    # Saves training config.
+    if trainer.is_world_process_zero():
+        save_training_config(sys.argv[1], training_args.output_dir)
+
+    # # Training
+    # if training_args.do_train:
+    #     checkpoint = None
+    #     if training_args.resume_from_checkpoint is not None:
+    #         checkpoint = training_args.resume_from_checkpoint
+    #     elif last_checkpoint is not None:
+    #         checkpoint = last_checkpoint
+
+    #     if training_args.compute_time:
+    #         torch.cuda.synchronize()  # wait for move to complete
+    #         start = torch.cuda.Event(enable_timing=True)
+    #         end = torch.cuda.Event(enable_timing=True)
+    #         start.record()
+
+    #     train_result = trainer.train(resume_from_checkpoint=checkpoint)
+
+    #     if training_args.compute_time:
+    #         end.record()
+    #         torch.cuda.synchronize()  # wait for all_reduce to complete
+    #         total_time = start.elapsed_time(end)/(1000*60)
+    #         performance_metrics.update({"total_time in minutes ": total_time})
+
+    #     trainer.save_model()  # Saves the tokenizer too for easy upload
+    #     train_metrics = train_result.metrics
+    #     max_train_samples = (
+    #         data_args.max_train_samples if data_args.max_train_samples is not None else len(splits['train'])
+    #     )
+    #     train_metrics["train_samples"] = min(max_train_samples, len(splits['train']))
+    #     trainer.log_metrics("train", train_metrics)
+    #     trainer.save_metrics("train", train_metrics)
+    #     trainer.save_state()
+
+    # if torch.cuda.is_available() and training_args.compute_memory:
+    #     peak_memory = (torch.cuda.max_memory_allocated() / 1024 ** 2)/1000
+    #     print(
+    #         "Memory utilization",
+    #         peak_memory,
+    #         "GB"
+    #     )
+    #     performance_metrics.update({"peak_memory": peak_memory})
+    # if training_args.compute_memory or training_args.compute_time:
+    #     print("Efficiency Statistics {}".format(performance_metrics))
+    #     trainer.save_metrics("performance", performance_metrics)
+
+    # Evaluation
+    all_results = {}
+
+    # all_results['evaluate'] = {}
+
+    # if training_args.do_eval:
+    #     logger.info("*** Evaluate ***")
+
+    #     metrics = trainer.evaluate(eval_dataset=splits['eval'],
+    #     )
+    #     trainer.log_metrics(f"{data_args.task_name}_eval", metrics)
+    #     trainer.save_metrics(f"{data_args.task_name}_eval", metrics)
+    #     all_results['evaluate'][data_args.task_name] = metrics
+
+    # Test
+    all_results['test'] = {}
+    if training_args.do_test:
+        logger.info("*** Test ***")
+        metrics = trainer.evaluate(eval_dataset=splits['test'],
+        metric_key_prefix="test"
+        )
+        trainer.log_metrics(f"{data_args.task_name}_test", metrics)
+        trainer.save_metrics(f"{data_args.task_name}_test", metrics)
+        all_results['test'][data_args.task_name] = metrics
+
+    # from opendelta.utils.delta_hub import create_hub_repo_name
+    # from opendelta.utils.delta_center import create_delta_center_args, create_repo_name
+
+    # repo_name = create_hub_repo_name(root="DeltaHub",
+    #                      dataset=data_args.task_name,
+    #                      delta_type = delta_args.delta_type,
+    #                      model_name_or_path= model_args.model_name_or_path)
+
+    # center_args =
+    # repo_name = create_repo_name(prefix="", center_args=center_args)
+    # all_results['repo_name'] = repo_name
+
+
+    # delta_model.save_finetuned(push_to_hf=training_args.push_to_hf,
+    #                            push_to_dc=training_args.push_to_dc,
+    #                            center_args={},
+    #                            center_args_pool = {**vars(model_args), **vars(data_args), **vars(training_args), **vars(delta_args)},
+    #                            delay_push=True,
+    #                         )
+
+    print(all_results)
+
+
+
+    # with open(f"{training_args.output_dir}/results.json", 'w') as fout:
+    #     string = json.dumps(all_results, indent=4,sort_keys=True)
+    #     fout.write(string+"\n")
+
+    return all_results
+
+
+
+
+if __name__ == "__main__":
+    result = main()
+
--- a/examples/examples_prompt/utils/args.py
+++ b/examples/examples_prompt/utils/args.py
@ -1,6 +1,10 @@
 from dataclasses import dataclass, field
 from typing import Optional, List
 from transformers import HfArgumentParser
+from pathlib import Path
+import sys
+
+

@dataclass
 class ModelArguments:
@ -81,6 +85,10 @@ class TrainingArguments(HfTrainingArguments):
    remove_unused_columns: Optional[bool] = field(
        default=False, metadata={"help": "Remove columns not required by the model when using an nlp.Dataset."}
    )
+    push_to_hf: Optional[bool] = field(default=False, metadata={"help": "Push the model to huggingface model hub."})
+    push_to_dc: Optional[bool] = field(default=True, metadata={"help": "Push the model to delta center."})
+
+



@ -211,28 +219,254 @@ class DataTrainingArguments:
            self.test_max_target_length = self.max_target_length


+
+import dataclasses
+
+@dataclass
+class DeltaArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+    delta_type: str= field(default="", metadata={"help": "the type of delta"})
+    backbone_model: Optional[str] = field(
+        default="", metadata={"help": "the backbone model"}
+    )
+    model_path_public: Optional[str] = field(
+        default="", metadata={"help": "the path (url) of the publicly available backbone model"}
+    )
+    modified_modules: Optional[List[str]] = field(
+        default_factory=lambda: None, metadata={"help": "the modules inside the backbone to be modified"}
+    )
+    unfrozen_modules: Optional[List[str]] = field(
+        default_factory=lambda:["deltas"], metadata={"help": "the modules inside the backbone or in the delta modules that need to be unfrozen"}
+    )
+    finetuned_delta_path: Optional[str] = field(
+        default=None, metadata={"help": "the path of the finetuned delta model"}
+    )
+    force_download: Optional[bool] = field(
+        default=False, metadata={"help": "whether to download the checkpoint form delta center no matter whether it exists"}
+    )
+    local_files_only: Optional[bool] = field(
+        default=False, metadata={"help": "whether not to look for file in delta center"}
+    )
+    delta_cache_dir: Optional[str] = field(
+        default=None, metadata={"help": "The cache path defined by user. If not set, we will firstly look into the"+
+        " working directory and then into the default cache path (ususally ~/.cache/delta_center)."}
+    )
+    delay_push: Optional[bool] = field(
+        default=True, metadata={
+            'help':'whether push the checkpoint to delta center later.'
+        }
+    )
+
+    def merge_arguments(self, objb):
+        print(objb)
+        self.__class__ = dataclasses.make_dataclass('DeltaArgument', fields=[(s.name, s.type, getattr(objb, s.name)) for s in dataclasses.fields(objb)], bases=(DeltaArguments,))
+
+
+
+
+@dataclass
+class AdapterArguments:
+    bottleneck_dim: Optional[int] = field(
+        default=24, metadata={"help": "the dimension of the bottleneck layer"}
+    )
+@dataclass
+class LoRAArguments:
+    lora_r: Optional[int] = field(
+        default=8, metadata={"help": "the rank of the LoRA metrics."}
+    )
+@dataclass
+class PrefixArguments:
+    pass
+@dataclass
+class BitFitArguments:
+    pass
+@dataclass
+class SoftPromptArguments:
+    soft_token_num: Optional[int] = field(
+        default=100, metadata={"help": "the num of soft tokens."}
+    )
+
+@dataclass
+class CompacterArguments:
+    pass
+@dataclass
+class LowRankAdapterArguments:
+    pass
+
+# from opendelta.delta_models.adapter import AdapterConfig
+# from opendelta.delta_models.bitfit import BitFitConfig
+# from opendelta.delta_models.compacter import CompacterConfig
+# from opendelta.delta_models.lora import LoraArguments
+# from opendelta.delta_models.low_rank_adapter import LowRankAdapterConfig
+# from opendelta.delta_models.prefix import PrefixConfig
+# from opendelta.delta_models.soft_prompt import SoftPromptConfig
+# DELTAARGMAP = {
+#     "adapter": AdapterConfig,
+#     "lora":LoraArguments,
+#     "prefix":PrefixConfig,
+#     "bitfit":BitFitConfig,
+#     "soft_prompt":SoftPromptConfig,
+#     "compacter":CompacterConfig,
+#     "low_rank_adapter":LowRankAdapterConfig
+
+# }
+
+DELTAARGMAP = {
+    "adapter": AdapterArguments,
+    "lora":LoRAArguments,
+    "prefix":PrefixArguments,
+    "bitfit":BitFitArguments,
+    "soft_prompt":SoftPromptArguments,
+    "compacter":CompacterArguments,
+    "low_rank_adapter":LowRankAdapterArguments
+
+}
+
+# TODO: add more specific delta arguments
+
+
+
 class RemainArgHfArgumentParser(HfArgumentParser):
-    def parse_json_file(self, json_file: str, return_remaining_args=True ):
+    '''This is a more powerful version of argument parser.
+    It can receiven both command line arguments and json file arguments.
+    The command line arguments will override the json file arguments.
+    The parser will load the specific delta arguments (e.g. Adapter's)
+    according to the delta_type argument. And merge the specific delta arguments
+    with the common delta arguments.
+    '''
+    def parse_json_file_with_cmd_args(self, json_file: str, command_line_args=None, return_remaining_args=True ):
        """
        Alternative helper method that does not use `argparse` at all, instead loading a json file and populating the
        dataclass types.
        """
-        import argparse
+
        import json
        from pathlib import Path
-        import dataclasses
+
+

        data = json.loads(Path(json_file).read_text())
+
+
+        data_str = ""
+        if command_line_args is None:
+            command_line_args = []
+        for key in data:
+            if "--"+key not in command_line_args:
+                if isinstance(data[key], list):
+                    data_str += "--"+key
+                    for elem in data[key]:
+                        data_str+=" "+ str(elem)
+                    data_str += " "
+                else:
+                    data_str+= "--" + key + " " + str(data[key]) + " "
+
+        data_list = data_str.split()
+        data_list += command_line_args
+
+
+        if return_remaining_args:
+            outputs, remain_args = self.parse_args_into_dataclasses(args=data_list, return_remaining_strings=return_remaining_args)
+            for d in outputs:
+                if isinstance(d, DeltaArguments): # merge the specific delta arguments
+                    d.merge_arguments(outputs[-1])
+
+            return  [*(outputs[:-1]), remain_args]
+        else:
+            outputs = self.parse_args_into_dataclasses(args=data_list, return_remaining_strings=return_remaining_args)
+            for d in outputs:
+                if isinstance(d, DeltaArguments):
+                    d.merge_arguments(outputs[-1])
+            return [*(outputs[:-1]),]
+
+    def parse_args_into_dataclasses(
+        self, args=None, return_remaining_strings=False, look_for_args_file=True, args_filename=None
+    ):
+        """
+        Parse command-line args into instances of the specified dataclass types.
+
+        This relies on argparse's `ArgumentParser.parse_known_args`. See the doc at:
+        docs.python.org/3.7/library/argparse.html#argparse.ArgumentParser.parse_args
+
+        Args:
+            args:
+                List of strings to parse. The default is taken from sys.argv. (same as argparse.ArgumentParser)
+            return_remaining_strings:
+                If true, also return a list of remaining argument strings.
+            look_for_args_file:
+                If true, will look for a ".args" file with the same base name as the entry point script for this
+                process, and will append its potential content to the command line args.
+            args_filename:
+                If not None, will uses this file instead of the ".args" file specified in the previous argument.
+
+        Returns:
+            Tuple consisting of:
+
+                - the dataclass instances in the same order as they were passed to the initializer.abspath
+                - if applicable, an additional namespace for more (non-dataclass backed) arguments added to the parser
+                  after initialization.
+                - The potential list of remaining argument strings. (same as argparse.ArgumentParser.parse_known_args)
+        """
+        if args_filename or (look_for_args_file and len(sys.argv)):
+            if args_filename:
+                args_file = Path(args_filename)
+            else:
+                args_file = Path(sys.argv[0]).with_suffix(".args")
+
+            if args_file.exists():
+                fargs = args_file.read_text().split()
+                args = fargs + args if args is not None else fargs + sys.argv[1:]
+                # in case of duplicate arguments the first one has precedence
+                # so we append rather than prepend.
+        namespace, remaining_args = self.parse_known_args(args=args)
+
+        # conditionally add delta arguments
+        deltatype_args = DELTAARGMAP[namespace.delta_type]
+        self.dataclass_types.append(deltatype_args)
+        self._add_dataclass_arguments(deltatype_args)
+
+        # parse the arguments again, this time with the specific delta type's arguments
+        namespace, remaining_args = self.parse_known_args(args=args)
+
+
        outputs = []
        for dtype in self.dataclass_types:
            keys = {f.name for f in dataclasses.fields(dtype) if f.init}
-            inputs = {k: data.pop(k) for k in list(data.keys()) if k in keys}
+            inputs = {k: v for k, v in vars(namespace).items() if k in keys}
+            for k in keys:
+                delattr(namespace, k)
            obj = dtype(**inputs)
            outputs.append(obj)
-
-        remain_args = argparse.ArgumentParser()
-        remain_args.__dict__.update(data)
-        if return_remaining_args:
-            return (*outputs, remain_args)
+        if len(namespace.__dict__) > 0:
+            # additional namespace.
+            outputs.append(namespace)
+        if return_remaining_strings:
+            return (outputs, remaining_args)
        else:
-            return (*outputs,)
+            if remaining_args:
+                raise ValueError(f"Some specified arguments are not used by the HfArgumentParser: {remaining_args}")
+
+            return outputs
+
+        # namespace, remaining_args = self.parse_known_args(args=data_list)
+
+        # print("Here", command_line_args, data_list,namespace, remaining_args)
+        # data.update(remain_args)
+
+        # outputs = []
+        # for dtype in self.dataclass_types:
+        #     keys = {f.name for f in dataclasses.fields(dtype) if f.init}
+        #     inputs = {k: namespace.get(k) for k in list(data.keys()) if k in keys}
+        #     obj = dtype(**inputs)
+        #     outputs.append(obj)
+
+        # # remain_args = argparse.ArgumentParser()
+        # remain_args.__dict__.update(remain_args)
+        # if return_remaining_args:
+        #     return (*outputs, remain_args)
+        # else:
+        #     return (*outputs,)
+
+
--- a/examples/legacies/examples_seq2seq/README.md
+++ b/examples/legacies/examples_seq2seq/README.md
--- a/examples/legacies/examples_seq2seq/init.py
+++ b/examples/legacies/examples_seq2seq/init.py
--- a/examples/legacies/examples_seq2seq/configs/config_gen_bs.py
+++ b/examples/legacies/examples_seq2seq/configs/config_gen_bs.py
--- a/examples/legacies/examples_seq2seq/data_processors/init.py
+++ b/examples/legacies/examples_seq2seq/data_processors/init.py
--- a/examples/legacies/examples_seq2seq/data_processors/data_collator.py
+++ b/examples/legacies/examples_seq2seq/data_processors/data_collator.py
--- a/examples/legacies/examples_seq2seq/data_processors/postprocessors.py
+++ b/examples/legacies/examples_seq2seq/data_processors/postprocessors.py
--- a/examples/legacies/examples_seq2seq/data_processors/tasks.py
+++ b/examples/legacies/examples_seq2seq/data_processors/tasks.py
--- a/examples/legacies/examples_seq2seq/data_processors/utils.py
+++ b/examples/legacies/examples_seq2seq/data_processors/utils.py
--- a/examples/legacies/examples_seq2seq/metrics/init.py
+++ b/examples/legacies/examples_seq2seq/metrics/init.py
--- a/examples/legacies/examples_seq2seq/metrics/metrics.py
+++ b/examples/legacies/examples_seq2seq/metrics/metrics.py
--- a/examples/legacies/examples_seq2seq/metrics/qa_utils.py
+++ b/examples/legacies/examples_seq2seq/metrics/qa_utils.py
--- a/examples/legacies/examples_seq2seq/run_seq2seq.py
+++ b/examples/legacies/examples_seq2seq/run_seq2seq.py
--- a/examples/legacies/examples_seq2seq/seq2seq_trainer.py
+++ b/examples/legacies/examples_seq2seq/seq2seq_trainer.py
--- a/examples/legacies/examples_seq2seq/trainers/init.py
+++ b/examples/legacies/examples_seq2seq/trainers/init.py
--- a/examples/legacies/examples_seq2seq/trainers/model_args.py
+++ b/examples/legacies/examples_seq2seq/trainers/model_args.py
--- a/examples/legacies/examples_seq2seq/trainers/seq2seq_trainer.py
+++ b/examples/legacies/examples_seq2seq/trainers/seq2seq_trainer.py
--- a/examples/legacies/examples_seq2seq/trainers/trainer.py
+++ b/examples/legacies/examples_seq2seq/trainers/trainer.py
--- a/examples/legacies/examples_seq2seq/trainers/trainer_args.py
+++ b/examples/legacies/examples_seq2seq/trainers/trainer_args.py
--- a/examples/legacies/examples_seq2seq/trainers/trainer_utils.py
+++ b/examples/legacies/examples_seq2seq/trainers/trainer_utils.py
--- a/examples/legacies/examples_seq2seq/utils/init.py
+++ b/examples/legacies/examples_seq2seq/utils/init.py
--- a/examples/legacies/examples_seq2seq/utils/utils.py
+++ b/examples/legacies/examples_seq2seq/utils/utils.py
--- a/examples/legacies/examples_text-classification/README.md
+++ b/examples/legacies/examples_text-classification/README.md
--- a/examples/legacies/examples_text-classification/configs/config_gen.py
+++ b/examples/legacies/examples_text-classification/configs/config_gen.py
--- a/examples/legacies/examples_text-classification/configs/lora_roberta-base/lora_cola.json
+++ b/examples/legacies/examples_text-classification/configs/lora_roberta-base/lora_cola.json
--- a/examples/legacies/examples_text-classification/configs/lora_roberta-base/lora_mnli.json
+++ b/examples/legacies/examples_text-classification/configs/lora_roberta-base/lora_mnli.json
--- a/examples/legacies/examples_text-classification/configs/lora_roberta-base/lora_mrpc.json
+++ b/examples/legacies/examples_text-classification/configs/lora_roberta-base/lora_mrpc.json
--- a/examples/legacies/examples_text-classification/configs/lora_roberta-base/lora_qnli.json
+++ b/examples/legacies/examples_text-classification/configs/lora_roberta-base/lora_qnli.json
--- a/examples/legacies/examples_text-classification/configs/lora_roberta-base/lora_qqp.json
+++ b/examples/legacies/examples_text-classification/configs/lora_roberta-base/lora_qqp.json
--- a/examples/legacies/examples_text-classification/configs/lora_roberta-base/lora_rte.json
+++ b/examples/legacies/examples_text-classification/configs/lora_roberta-base/lora_rte.json
--- a/examples/legacies/examples_text-classification/configs/lora_roberta-base/lora_sst2.json
+++ b/examples/legacies/examples_text-classification/configs/lora_roberta-base/lora_sst2.json
--- a/examples/legacies/examples_text-classification/configs/lora_roberta-base/lora_stsb.json
+++ b/examples/legacies/examples_text-classification/configs/lora_roberta-base/lora_stsb.json
--- a/examples/legacies/examples_text-classification/configs/lora_roberta-base/lora_wnli.json
+++ b/examples/legacies/examples_text-classification/configs/lora_roberta-base/lora_wnli.json
--- a/examples/legacies/examples_text-classification/configs/prefix_roberta-base/cola.json
+++ b/examples/legacies/examples_text-classification/configs/prefix_roberta-base/cola.json
--- a/examples/legacies/examples_text-classification/configs/prefix_roberta-base/mnli.json
+++ b/examples/legacies/examples_text-classification/configs/prefix_roberta-base/mnli.json
--- a/examples/legacies/examples_text-classification/configs/prefix_roberta-base/mrpc.json
+++ b/examples/legacies/examples_text-classification/configs/prefix_roberta-base/mrpc.json
--- a/examples/legacies/examples_text-classification/configs/prefix_roberta-base/qnli.json
+++ b/examples/legacies/examples_text-classification/configs/prefix_roberta-base/qnli.json
--- a/examples/legacies/examples_text-classification/configs/prefix_roberta-base/qqp.json
+++ b/examples/legacies/examples_text-classification/configs/prefix_roberta-base/qqp.json
--- a/examples/legacies/examples_text-classification/configs/prefix_roberta-base/rte.json
+++ b/examples/legacies/examples_text-classification/configs/prefix_roberta-base/rte.json
--- a/examples/legacies/examples_text-classification/configs/prefix_roberta-base/sst2.json
+++ b/examples/legacies/examples_text-classification/configs/prefix_roberta-base/sst2.json
--- a/examples/legacies/examples_text-classification/configs/prefix_roberta-base/stsb.json
+++ b/examples/legacies/examples_text-classification/configs/prefix_roberta-base/stsb.json
--- a/examples/legacies/examples_text-classification/configs/prefix_roberta-base/superglue-boolq.json
+++ b/examples/legacies/examples_text-classification/configs/prefix_roberta-base/superglue-boolq.json
--- a/examples/legacies/examples_text-classification/configs/prefix_roberta-base/superglue-cb.json
+++ b/examples/legacies/examples_text-classification/configs/prefix_roberta-base/superglue-cb.json
--- a/examples/legacies/examples_text-classification/configs/prefix_roberta-base/superglue-copa.json
+++ b/examples/legacies/examples_text-classification/configs/prefix_roberta-base/superglue-copa.json
--- a/examples/legacies/examples_text-classification/configs/prefix_roberta-base/superglue-multirc.json
+++ b/examples/legacies/examples_text-classification/configs/prefix_roberta-base/superglue-multirc.json
--- a/examples/legacies/examples_text-classification/configs/prefix_roberta-base/superglue-record.json
+++ b/examples/legacies/examples_text-classification/configs/prefix_roberta-base/superglue-record.json
--- a/examples/legacies/examples_text-classification/configs/prefix_roberta-base/superglue-wic.json
+++ b/examples/legacies/examples_text-classification/configs/prefix_roberta-base/superglue-wic.json
--- a/examples/legacies/examples_text-classification/configs/prefix_roberta-base/superglue-wsc.fixed.json
+++ b/examples/legacies/examples_text-classification/configs/prefix_roberta-base/superglue-wsc.fixed.json
--- a/examples/legacies/examples_text-classification/metrics/glue.py
+++ b/examples/legacies/examples_text-classification/metrics/glue.py
--- a/examples/legacies/examples_text-classification/requirements.txt
+++ b/examples/legacies/examples_text-classification/requirements.txt
--- a/examples/legacies/examples_text-classification/run.sh
+++ b/examples/legacies/examples_text-classification/run.sh
--- a/examples/legacies/examples_text-classification/run_glue.py
+++ b/examples/legacies/examples_text-classification/run_glue.py
--- a/examples/legacies/examples_text-classification/util.py
+++ b/examples/legacies/examples_text-classification/util.py
--- a/examples/legacies/setup_seq2seq.py
+++ b/examples/legacies/setup_seq2seq.py
--- a/examples/setup_prompt.py
+++ b/examples/setup_prompt.py
@ -1,48 +0,0 @@
-"""Install Compacter."""
-import os 
-import setuptools
-from torch.utils.cpp_extension import BuildExtension, CUDAExtension
-
-#os.environ['TORCH_CUDA_ARCH_LIST']="3.5;3.7;6.1;7.0;7.5;8.6+PTX"
-
-def setup_package():
-  long_description = "examples_prompt"
-  setuptools.setup(
-      name='examples_prompt',
-      version='0.0.1',
-      description='textual prompt example',
-      long_description=long_description,
-      long_description_content_type='text/markdown',
-      author='Shengding Hu',
-      license='MIT License',
-      packages=setuptools.find_packages(
-          exclude=['docs', 'tests', 'scripts']),
-      dependency_links=[
-          'https://download.pytorch.org/whl/torch_stable.html',
-      ],
-      classifiers=[
-          'Intended Audience :: Developers',
-          'Intended Audience :: Science/Research',
-          'License :: OSI Approved :: MIT License',
-          'Topic :: Scientific/Engineering :: Artificial Intelligence',
-          'Programming Language :: Python :: 3',
-          'Programming Language :: Python :: 3.7.10',
-      ],
-      keywords='text nlp machinelearning',
-      # ext_modules=[
-      #   CUDAExtension('seq2seq.projections.fwh_cuda',
-      #       sources=[
-      #       'seq2seq/projections/fwh_cuda/fwh_cpp.cpp',
-      #       'seq2seq/projections/fwh_cuda/fwh_cu.cu',
-      #       ]
-      #   )
-      # ]
-      # ,
-      cmdclass={"build_ext": BuildExtension},
-      install_requires=[
-      ],
-  )
-
-
-if __name__ == '__main__':
-  setup_package()
--- a/opendelta/init.py
+++ b/opendelta/init.py
@ -1,5 +1,5 @@

-__version__ = "0.1.0"
+__version__ = "0.2.4"

 class GlobalSetting:
    def __init__(self):
--- a/opendelta/auto_delta.py
+++ b/opendelta/auto_delta.py
@ -2,16 +2,14 @@ from copy import deepcopy
 from typing import Any, Dict, OrderedDict
 from opendelta.utils.visualization import Visualization
 import torch.nn as nn
-from transformers.file_utils import PushToHubMixin
 from opendelta.utils.logging import get_logger
 import importlib
 from opendelta.delta_configs import BaseDeltaConfig
-from opendelta.basemodel import DeltaBase
 logger = get_logger(__name__)


 DELTA_CONFIG_MAPPING = {
-    "lora": "LoraConfig", 
+    "lora": "LoraConfig",
    "low_rank_adapter": "LowRankAdapterConfig",
    "bitfit": "BitFitConfig",
    "adapter":"AdapterConfig",
@ -91,18 +89,18 @@ class AutoDeltaConfig:
            "AutoConfig is designed to be instantiated "
            "using the ``AutoConfig.from_pretrained(pretrained_model_name_or_path)`` method."
        )
-    
+
    @classmethod
    def from_dict(cls, config_dict: Dict[str, Any], **kwargs):
-        r""" Instantiate a DeltaConfig according to the dict. Automatically load the config specified by 
+        r""" Instantiate a DeltaConfig according to the dict. Automatically load the config specified by
        :obj:`delta_type`.

        Args:
            config_dict (:obj:`dict`): The dict of configs of delta model.
-            kwargs: Other keyword argument pass to initialize the config. 
+            kwargs: Other keyword argument pass to initialize the config.

        >>> config = AutoDeltaConfig.from_dict({"delta_type":"lora"}) # This will load the dault lora config.
-        >>> config = AutoDeltaConfig.from_dict({"delta_type":"lora", "lora_r":5}) # Will load the default lora config, with lora_r = 5 
+        >>> config = AutoDeltaConfig.from_dict({"delta_type":"lora", "lora_r":5}) # Will load the default lora config, with lora_r = 5

        """
        config_dict = deepcopy(config_dict)
@ -114,7 +112,7 @@ class AutoDeltaConfig:


    @classmethod
-    def from_finetuned(cls, finetuned_model_name_or_path, **kwargs):
+    def from_finetuned(cls, finetuned_delta_path, **kwargs):
        r"""
        Instantiate one of the configuration classes of the library from a finetuned delta model configuration.
        The configuration class to instantiate is selected based on the ``delta_type`` property of the config object that
@ -122,18 +120,18 @@ class AutoDeltaConfig:

        Parameters:

-            finetuned_model_name_or_path (:obj:`str` or :obj:`os.PathLike`, *optional*): 
+            finetuned_delta_path (:obj:`str` or :obj:`os.PathLike`, *optional*):
                Can be either:

                - A string, the *model id* of a finetuned delta model configuration hosted inside a model repo on
                  huggingface.co. Valid model ids can be located at the root-level, like ``Davin/lora``, or
                  namespaced under a user or organization name, like ``DeltaHub/lora_t5-base_mrpc``.
                - A path to a *directory* containing a configuration file saved using the
-                  :py:meth:`DeltaBase.save_finetuned` method, 
+                  :py:meth:`DeltaBase.save_finetuned` method,
                  e.g., ``./my_model_directory/``.
                - A path or url to a saved configuration JSON *file*, e.g.,
                  ``./my_model_directory/configuration.json``.
-                The last two option are not tested but inherited from huggingface. 
+                The last two option are not tested but inherited from huggingface.
            cache_dir (:obj:`str` or :obj:`os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model configuration should be cached if the
                standard cache should not be used.
@ -163,9 +161,9 @@ class AutoDeltaConfig:
                The values in kwargs of any keys which are configuration attributes will be used to override the loaded
                values. Behavior concerning key/value pairs whose keys are *not* configuration attributes is controlled
                by the ``return_unused_kwargs`` keyword parameter.
-        
+
        Examples:
-        
+
        .. code-block:: python

            from transformers import AutoConfig
@ -173,25 +171,24 @@ class AutoDeltaConfig:

        """

-        kwargs["name_or_path"] = finetuned_model_name_or_path

-        config_dict, _ = BaseDeltaConfig.get_config_dict(finetuned_model_name_or_path, **kwargs)
+        config_dict, kwargs = BaseDeltaConfig.get_config_dict(finetuned_delta_path, **kwargs)
        if "delta_type" in config_dict:
            config_class = LAZY_CONFIG_MAPPING[config_dict["delta_type"]]
            return config_class.from_dict(config_dict, **kwargs)
        else:
            # Fallback: use pattern matching on the string.
            for pattern, config_class in LAZY_CONFIG_MAPPING.items():
-                if pattern in str(finetuned_model_name_or_path):
+                if pattern in str(finetuned_delta_path):
                    return config_class.from_dict(config_dict, **kwargs)

        raise ValueError(
-            f"Unrecognized model in {finetuned_model_name_or_path}. "
+            f"Unrecognized model in {finetuned_delta_path}. "
            f"Should have a `delta_type` key in the loaded config, or contain one of the following strings "
            f"in its name: {', '.join(LAZY_CONFIG_MAPPING.keys())}"
        )

-### AutoModels below 
+### AutoModels below

 class _LazyAutoMapping(OrderedDict):
    """
@ -323,20 +320,20 @@ class AutoDeltaModel:
            f"using the `{self.__class__.__name__}.from_pretrained(pretrained_model_name_or_path)` or "
            f"`{self.__class__.__name__}.from_config(config)` methods."
        )
-    
+
    @classmethod
    def from_config(cls, config, backbone_model, **kwargs): #-> "DeltaBase":
        r"""Automatically instantiates a delta model based on the :obj:`config`. The delta model correspond to the delta
-        :obj:`config` will be loaded and initialized using the arguments in :obj:`config`. 
+        :obj:`config` will be loaded and initialized using the arguments in :obj:`config`.

        .. note::
-            Only using :meth:`from_config` method will not load the finetuned weight file (e.g., pytorch_model.bin). 
-            Please use from_finetuned directly. 
+            Only using :meth:`from_config` method will not load the finetuned weight file (e.g., pytorch_model.bin).
+            Please use from_finetuned directly.

        Args:
            config (:obj:`BaseDeltaConfig`):
            backbone_model (:obj:`nn.Module`):
-    
+
        Examples:

        .. code-block:: python
@ -355,53 +352,54 @@ class AutoDeltaModel:
        )

    @classmethod
-    def from_finetuned(cls, finetuned_model_name_or_path, backbone_model, *model_args, **kwargs):
-        r""" Automatically instantiated a delta model and load the finetuned checkpoints based on the 
-        :obj:`finetuned_model_name_or_path`, which can either be a string pointing to a local path or a url pointint to 
-        the delta hub. It will check the hash after loading the delta model to see whether the correct backbone and 
-        delta checkpoint are used. 
+    def from_finetuned(cls, finetuned_delta_path, backbone_model, *model_args, **kwargs):
+        r""" Automatically instantiated a delta model and load the finetuned checkpoints based on the
+        :obj:`finetuned_delta_path`, which can either be a string pointing to a local path or a url pointint to
+        the delta hub. It will check the hash after loading the delta model to see whether the correct backbone and
+        delta checkpoint are used.

        Args:
-            finetuned_model_name_or_path (:obj:`str` or :obj:`os.PathLike`, *optional*): 
+            finetuned_delta_path (:obj:`str` or :obj:`os.PathLike`, *optional*):
                Can be either:

                - A string, the *model id* of a finetuned delta model configuration hosted inside a model repo on
                  huggingface.co. Valid model ids can be located at the root-level, like ``Davin/lora``, or
                  namespaced under a user or organization name, like ``DeltaHub/lora_t5-base_mrpc``.
                - A path to a *directory* containing a configuration file saved using the
-                  :py:meth:`DeltaBase.save_finetuned` method, 
+                  :py:meth:`DeltaBase.save_finetuned` method,
                  e.g., ``./my_model_directory/``.
                - A path or url to a saved configuration JSON *file*, e.g.,
                  ``./my_model_directory/configuration.json``.
-                The last two option are not tested but inherited from huggingface. 
+                The last two option are not tested but inherited from huggingface.

            backbone_model (:obj:`nn.Module`): The backbone model to be modified.
            model_args: Other argument for initialize the model.
+            kwargs: Other kwargs that will be passed into DeltaBase.from_finetuned.

        Example:
-        
+
        .. code-block:: python

            delta_model = AutoDeltaModel.from_finetuned("DeltaHub/lora_t5-base-mrpc", backbone_model)

        """
-        config = kwargs.pop("config", None)
+        delta_config = kwargs.pop("delta_config", None)

-        if not isinstance(config, BaseDeltaConfig):
-            config, kwargs = AutoDeltaConfig.from_finetuned(
-                finetuned_model_name_or_path, return_unused_kwargs=True, **kwargs
+        if not isinstance(delta_config, BaseDeltaConfig):
+            delta_config, kwargs = AutoDeltaConfig.from_finetuned(
+                finetuned_delta_path, return_unused_kwargs=True, **kwargs
            )
-        if type(config) in cls._delta_model_mapping.keys():
-            model_class = cls._delta_model_mapping[type(config)]
-            return model_class.from_finetuned(finetuned_model_name_or_path, backbone_model, *model_args, **kwargs)
+        if type(delta_config) in cls._delta_model_mapping.keys():
+            model_class = cls._delta_model_mapping[type(delta_config)]
+            return model_class.from_finetuned(finetuned_delta_path, backbone_model, *model_args, delta_config=delta_config,  **kwargs)
        raise ValueError(
            f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n"
            f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}."
        )
-        


-    
+
+

 if __name__ == "__main__":

--- a/opendelta/basemodel.py
+++ b/opendelta/basemodel.py
@ -671,21 +671,46 @@ class DeltaBase(nn.Module, SaveLoadMixin):
        if visualization:
            from opendelta import Visualization
            Visualization(module).structure_graph()
+
+        self.get_statistics(module)
        if trainable_ratio:
-            n_trainable = self.num_trainable_parameters(module)
-            n_total = self.num_total_parameters(module)
-            logger.info("Trainable Ratio: {:2f}%".format(n_trainable/n_total*100))
+            logger.info("Trainable Ratio: {:2f}%".format(self.stat['trainable_ratio']*100))
        if delta_ratio:
-            n_delta = self.num_delta_parameters(module)
-            n_total = self.num_total_parameters(module)
-            logger.info("Delta Parameter Ratio: {:2f}%".format(n_delta/n_total*100))
+            logger.info("Delta Parameter Ratio: {:2f}%".format(self.stat['delta_ratio']*100))
        if cuda_memory:
-            cudamem = 0
-            maxcudamem = 0
-            for device_id in range(torch.cuda.device_count()):
-                cudamem += torch.cuda.memory_allocated(f"cuda:{device_id}")/1024**3
-                maxcudamem += torch.cuda.max_memory_allocated(f"cuda:{device_id}")/1024**3
-            logger.info("Static Memory {:.2f} GB, Max Memory {:.2f} GB".format(cudamem, maxcudamem))
+            logger.info("Static Memory {:.2f} GB, Max Memory {:.2f} GB".format(self.stat['cudamem'], self.stat['maxcudamem']))
+
+
+    def get_statistics(self, module=None):
+        r"""Get the statistics of the parameters in the delta modules.
+
+        Args:
+            module (:obj:`nn.Module`, *optional*): The module to compute the statistics.
+
+        Returns:
+            :obj:`dict`: The statistics of the parameters in the delta modules.
+
+        """
+        if module is None:
+            module = self.backbone_model
+
+        self.stat = {}
+        n_trainable = self.num_trainable_parameters(module)
+        n_total = self.num_total_parameters(module)
+
+        self.stat['trainable_ratio'] = n_trainable/n_total
+
+        n_delta = self.num_delta_parameters(module)
+        n_total = self.num_total_parameters(module)
+        self.stat['delta_ratio'] = n_delta/n_total
+
+        cudamem = 0
+        maxcudamem = 0
+        for device_id in range(torch.cuda.device_count()):
+            cudamem += torch.cuda.memory_allocated(f"cuda:{device_id}")/1024**3
+            maxcudamem += torch.cuda.max_memory_allocated(f"cuda:{device_id}")/1024**3
+        self.stat['cudamem'] = cudamem
+        self.stat['maxcudamem'] = maxcudamem



--- a/Show More
+++ b/Show More