From cd3b981f6160d570db0fa29593d2468bc5813408 Mon Sep 17 00:00:00 2001
From: shengdinghu <shengdinghu@gmail.com>
Date: Fri, 22 Apr 2022 19:30:05 +0800
Subject: [PATCH] update examples

---
 .gitignore                                    |   9 +-
 examples/examples_prompt/README.md            |  49 +-
 .../examples_prompt/configs/gen_albert.py     | 116 +++++
 examples/examples_prompt/configs/gen_beit.py  | 450 ++++++++++++++++
 examples/examples_prompt/configs/gen_bert.py  | 116 +++++
 examples/examples_prompt/configs/gen_gpt.py   | 433 ++++++++++++++++
 .../examples_prompt/configs/gen_roberta.py    | 143 ++++++
 examples/examples_prompt/configs/gen_t5.py    | 444 ++++++++++++++++
 examples/examples_prompt/run.py               | 482 ------------------
 examples/examples_prompt/run.sh               |   7 -
 examples/examples_prompt/run_mlm.sh           |  11 -
 .../examples_seq2seq/configs/config_gen_bs.py | 411 +++++++++++++++
 .../configs/config_gen_bs1.py                 | 411 +++++++++++++++
 .../configs/config_gen_bs64.py                | 411 +++++++++++++++
 .../configs/config_gen_bs8.py                 | 411 +++++++++++++++
 15 files changed, 3355 insertions(+), 549 deletions(-)
 create mode 100644 examples/examples_prompt/configs/gen_albert.py
 create mode 100644 examples/examples_prompt/configs/gen_beit.py
 create mode 100644 examples/examples_prompt/configs/gen_bert.py
 create mode 100644 examples/examples_prompt/configs/gen_gpt.py
 create mode 100644 examples/examples_prompt/configs/gen_roberta.py
 create mode 100644 examples/examples_prompt/configs/gen_t5.py
 delete mode 100644 examples/examples_prompt/run.py
 delete mode 100644 examples/examples_prompt/run.sh
 delete mode 100644 examples/examples_prompt/run_mlm.sh
 create mode 100644 examples/examples_seq2seq/configs/config_gen_bs.py
 create mode 100644 examples/examples_seq2seq/configs/config_gen_bs1.py
 create mode 100644 examples/examples_seq2seq/configs/config_gen_bs64.py
 create mode 100644 examples/examples_seq2seq/configs/config_gen_bs8.py

diff --git a/.gitignore b/.gitignore
index bd8f570..9bb4b23 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,18 +17,17 @@ _build/
 outputs/
 log.txt
 **/DeltaHub/
+**/sfs_scripts/
 *beans/
-**/examples/*/configs/
-
+**/examples/*/configs/*
 !examples/*/configs/config_gen.py
 **/jupyter_notebook_examples/
 !examples/jupyter_notebook_examples/*.py
-
-
-!**/examples/*/configs/*.py
+!examples/*/configs/*.py
 **/outputs_search/**/*.bin
 **/outputs_search/**/*.pt
 
+
 *.db
 **/nohup.out
 **/examples/examples_bmtrain/BigModels/down_data
diff --git a/examples/examples_prompt/README.md b/examples/examples_prompt/README.md
index 38c5b22..fdb2ded 100644
--- a/examples/examples_prompt/README.md
+++ b/examples/examples_prompt/README.md
@@ -10,55 +10,16 @@ This will add `examples_seq2seq` to the environment path of the python lib.
 
 ## Generating the json configuration file
 
+```shell
+python configs/gen_$BACKBONETYPE.py --job $YOURJOB
+#e.g. python configs/gen_beit.py --job lora_beit-base-patch16-224
 ```
-python config_gen.py --job $job_name
-
-```
-The available job configuration (e.g., `--job lora_t5-base`) can be seen from `config_gen.py`. You can also
+The available job configuration (e.g., `--job lora_beit-base-patch16-224`) can be seen from the scripts. You can also
 create your only configuration.
 
 
 ## Run the code
 
 ```
-python run_seq2seq.py configs/$job_name/$dataset.json
+CUDA_VISIBLE_DEVICES=1 python src/run.py configs/lora_beit-base-patch16-224/beans.json
 ```
-
-## Possible Errors
-
-1. 
-```
-ValueError: You must login to the Hugging Face hub on this computer by typing `transformers-cli login` and entering your credentials to use `use_auth_token=Tr
-ue`. Alternatively, you can pass your own token as the `use_auth_token` argument.
-```
-- Solution 1: Please register an account on [HuggingFace](https://huggingface.co/) 
-Then run transformers-cli login on your command line to enter the username and password.
-
-- Solution 2: Disable push_to_hub by modifying in the config.json : "push_to_hub": False
-
-2. 
-```
-OSError: Looks like you do not have git-lfs installed, please install. You can install from https://git-lfs.github.com/. Then run `git lfs install` (you only have to do this once).
-```
-
-- Solution 1:
-```
-wget -P ~ https://github.com/git-lfs/git-lfs/releases/download/v3.0.2/git-lfs-linux-amd64-v3.0.2.tar.gz
-cd ~
-tar -xvzf git-lfs-linux-amd64-v3.0.2.tar.gz
-export PATH=~:$PATH
-git-lfs install
-```
-
-- Solution 2: Disable push_to_hub by modifying in the config.json : "push_to_hub": False
-
-
-3. dataset connection error
-
-Solution 1: open a python console, running the error command again, may not be useful
-
-Solution 2: download the dataset by yourself on a internect connected machine, saved to disk and transfer to your server, at last load_from_disk.
-
-
-## Link to the original training scripts
-This example repo is based on the [compacter training scripts](https://github.com/rabeehk/compacter), with compacter-related lines removed. Thanks to the authors of the original repo. In addition, in private correspondence with the authors, they shared the codes to create the json configs. Thanks again for their efforts. 
diff --git a/examples/examples_prompt/configs/gen_albert.py b/examples/examples_prompt/configs/gen_albert.py
new file mode 100644
index 0000000..be9af6d
--- /dev/null
+++ b/examples/examples_prompt/configs/gen_albert.py
@@ -0,0 +1,116 @@
+import collections
+import copy
+
+PATHBASE="/mnt/sfs_turbo/hsd/plm_cache/"
+PATHBASE="/home/hushengding/plm_cache/"
+
+AllConfigs = {}
+
+BaseConfigs = {}
+
+
+#### ROBERTA######
+BaseConfigs['albert-xlarge-v2'] = {
+                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs",
+                "max_source_length",
+                "per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
+                    "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
+                    [ 20,  20,  40,  20,   3,   3,  20,  20,  20,   3,   3,  20,   3,   3,  20],
+                    [256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [0] *7 +[0] *8,
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                ),
+                "do_train": True,
+                "do_eval": True,
+                "do_test": True,
+
+                "model_name_or_path": f"{PATHBASE}albert-xlarge-v2",
+                "tokenizer_name": f"{PATHBASE}albert-xlarge-v2",
+                "save_total_limit": 1,
+                # For glue datasets.
+                "is_seq2seq": False,
+                "split_validation_test": True,
+                "seed": 42,
+                "dataset_config_name": ["en"],
+                "eval_dataset_config_name": ["en"],
+                "test_dataset_config_name": ["en"],
+                # other configurations.
+                "predict_with_generate": False,
+                # To evaluate during training.
+                "load_best_model_at_end": True,
+                "metric_for_best_model": "average_metrics",
+                "greater_is_better": True,
+                "evaluation_strategy": "steps",
+                "overwrite_output_dir": True,
+                "push_to_hub": False,
+                "push_to_delta_center": True,
+                "save_strategy": "steps"
+            }
+
+AllConfigs['prefix_albert-xlarge-v2'] = copy.deepcopy(BaseConfigs['albert-xlarge-v2'])
+AllConfigs['prefix_albert-xlarge-v2'].update({
+                                "delta_type": "prefix",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/prefix/albert-xlarge-v2/",
+                            })
+
+AllConfigs['soft_prompt_albert-xlarge-v2'] = copy.deepcopy(BaseConfigs['albert-xlarge-v2'])
+AllConfigs['soft_prompt_albert-xlarge-v2'].update({
+                                "delta_type": "soft_prompt",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/soft_prompt/albert-xlarge-v2/",
+                            })
+
+if __name__ == "__main__":
+    import argparse
+    import json
+    import os
+    parser = argparse.ArgumentParser("Parser to generate configuration")
+    parser.add_argument("--job", type=str)
+    args = parser.parse_args()
+
+    config = AllConfigs[args.job]
+
+    Cartesian_product = []
+    for key in config:
+        if isinstance(key, tuple):
+            Cartesian_product.append(key)
+    all_config_jsons = {}
+    for key_tuple in Cartesian_product:
+        for zipped in config[key_tuple]:
+            job_name = zipped[0]
+            all_config_jsons[job_name] = {}
+            for key_name, zipped_elem in zip(key_tuple, zipped):
+                if key_name != 'job_name':
+                    all_config_jsons[job_name][key_name] = zipped_elem
+    for key in config:
+        if not isinstance(key, tuple):
+            for job_name in all_config_jsons:
+                if key == "output_dir":
+                    all_config_jsons[job_name][key] = config[key] + job_name
+                else:
+                    all_config_jsons[job_name][key] = config[key]
+
+
+    if not os.path.exists(f"configs/{args.job}/"):
+        os.mkdir(f"configs/{args.job}/")
+
+    for job_name in all_config_jsons:
+        with open(f"configs/{args.job}/{job_name}.json", 'w') as fout:
+            json.dump(all_config_jsons[job_name], fout, indent=4,sort_keys=True)
+
+
+
diff --git a/examples/examples_prompt/configs/gen_beit.py b/examples/examples_prompt/configs/gen_beit.py
new file mode 100644
index 0000000..9b61108
--- /dev/null
+++ b/examples/examples_prompt/configs/gen_beit.py
@@ -0,0 +1,450 @@
+import collections
+import copy
+
+PATHBASE="/mnt/sfs_turbo/hsd/plm_cache/"
+PATHBASE="/home/hushengding/plm_cache/"
+
+AllConfigs = {}
+
+BaseConfigs = {}
+BaseConfigs['beit-base-patch16-224'] = {
+                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs",
+                "max_source_length",
+                "per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps", "num_classes"): zip(
+                    ["beans"],
+                    ["beans"], #"superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["beans"], #"superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["beans"], #"superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
+                    [ 20],
+                    [256],
+                    [ 32],
+                    [ 32],#,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [0], # *7 +[0] *8,
+                    [200],# 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [200],#, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [ 3],
+                ),
+                "do_train": True,
+                "do_eval": True,
+                "do_test": True,
+
+                "model_name_or_path": f"{PATHBASE}beit-base-patch16-224",
+                "tokenizer_name": f"{PATHBASE}beit-base-patch16-224",
+                "save_total_limit": 1,
+                # For glue datasets.
+                "split_validation_test": True,
+                "seed": 42,
+                "dataset_config_name": ["en"],
+                "eval_dataset_config_name": ["en"],
+                "test_dataset_config_name": ["en"],
+                # other configurations.
+                "predict_with_generate": False,
+                # To evaluate during training.
+                "load_best_model_at_end": True,
+                "metric_for_best_model": "average_metrics",
+                "greater_is_better": True,
+                "evaluation_strategy": "steps",
+
+                "overwrite_output_dir": True,
+                "push_to_hub": False,
+                "push_to_delta_center": True,
+                "save_strategy": "steps",
+                "datasets_load_from_disk":False,
+            }
+
+AllConfigs['bitfit_beit-base-patch16-224'] = copy.deepcopy(BaseConfigs['beit-base-patch16-224'])
+AllConfigs['bitfit_beit-base-patch16-224'].update({
+                "delta_type": "bitfit",
+                "learning_rate": 3e-4,
+                "output_dir": "outputs/bitfit/beit-base-patch16-224/",
+            })
+
+AllConfigs['adapter_beit-base-patch16-224'] = copy.deepcopy(BaseConfigs['beit-base-patch16-224'])
+AllConfigs['adapter_beit-base-patch16-224'].update({
+                                "delta_type": "adapter",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "bottleneck_dim":24,
+                                "output_dir": "outputs/adapter/beit-base-patch16-224/",
+                            })
+
+AllConfigs['lora_beit-base-patch16-224'] = copy.deepcopy(BaseConfigs['beit-base-patch16-224'])
+AllConfigs['lora_beit-base-patch16-224'].update({
+                                "delta_type": "lora",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layernorm_after",
+                                    "classifier"
+                                ],
+                                "modified_modules":[
+                                    "query",
+                                    "value",
+                                ],
+                                "lora_r": 8,
+                                "output_dir": "outputs/lora/beit-base-patch16-224/",
+                            })
+
+AllConfigs['compacter_beit-base-patch16-224'] = copy.deepcopy(BaseConfigs['beit-base-patch16-224'])
+AllConfigs['compacter_beit-base-patch16-224'].update({
+                                "delta_type": "compacter",
+                                "learning_rate": 3e-3,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "output_dir": "outputs/compacter/beit-base-patch16-224/",
+                                "non_linearity": "gelu_new",
+
+                                #Compacter.
+                                "hypercomplex_division": 4,
+                                "hypercomplex_adapters": True,
+                                "hypercomplex_nonlinearity": "glorot-uniform",
+                                # gradient clip and clamp
+                                "gradient_clip": False,
+                                "phm_clamp": False,
+                                "normalize_phm_weight": False,
+                                "learn_phm": True,
+                                # shared one side
+                                "factorized_phm": True,
+                                "shared_phm_rule": False,
+                                "factorized_phm_rule": False,
+                                "phm_c_init": "normal",
+                                "phm_init_range": 0.0001,
+                                "use_bias_down_sampler": True,
+                                "use_bias_up_sampler": True,
+                            })
+
+AllConfigs['compacter++_beit-base-patch16-224'] = copy.deepcopy(BaseConfigs['beit-base-patch16-224'])
+AllConfigs['compacter++_beit-base-patch16-224'].update({
+                                "delta_type": "compacter",
+                                "learning_rate": 3e-3,
+                                "do_train": True,
+                                "do_eval": True,
+                                "do_test": True,
+                                "modified_modules": [
+                                    "DenseReluDense"
+                                ],
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "output_dir": "outputs/compacter++/beit-base-patch16-224/",
+                                "non_linearity": "gelu_new",
+
+                                #Compacter.
+                                "hypercomplex_division": 4,
+                                "hypercomplex_adapters": True,
+                                "hypercomplex_nonlinearity": "glorot-uniform",
+                                # gradient clip and clamp
+                                "gradient_clip": False,
+                                "phm_clamp": False,
+                                "normalize_phm_weight": False,
+                                "learn_phm": True,
+                                # shared one side
+                                "factorized_phm": True,
+                                "shared_phm_rule": False,
+                                "factorized_phm_rule": False,
+                                "phm_c_init": "normal",
+                                "phm_init_range": 0.0001,
+                                "use_bias_down_sampler": True,
+                                "use_bias_up_sampler": True,
+                            })
+
+
+AllConfigs['low_rank_adapter_beit-base-patch16-224'] = copy.deepcopy(BaseConfigs['beit-base-patch16-224'])
+AllConfigs['low_rank_adapter_beit-base-patch16-224'].update({
+                                "delta_type": "low_rank_adapter",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "output_dir": "outputs/low_rank_adapter/beit-base-patch16-224/",
+                                "non_linearity": "gelu_new",
+                                "low_rank_w_init": "glorot-uniform",
+                                "low_rank_rank": 1,
+                            })
+
+
+AllConfigs['soft_prompt_beit-base-patch16-224'] = copy.deepcopy(BaseConfigs['beit-base-patch16-224'])
+AllConfigs['soft_prompt_beit-base-patch16-224'].update({
+                                "delta_type": "soft_prompt",
+                                "learning_rate": 3e-2,
+                                "soft_token_num":100,
+                                "token_init": False,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/soft_prompt/beit-base-patch16-224/",
+                            })
+
+AllConfigs['prefix_beit-base-patch16-224'] = copy.deepcopy(BaseConfigs['beit-base-patch16-224'])
+AllConfigs['prefix_beit-base-patch16-224'].update({
+                                "delta_type": "prefix",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/prefix/beit-base-patch16-224/",
+                            })
+
+AllConfigs['soft_prompt_beit-base-patch16-224'] = copy.deepcopy(BaseConfigs['beit-base-patch16-224'])
+AllConfigs['soft_prompt_beit-base-patch16-224'].update({
+                                "delta_type": "soft_prompt",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/soft_prompt/beit-base-patch16-224/",
+                            })
+#### beit-base-patch16-224
+BaseConfigs['t5-small'] = {
+                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs",
+                "max_source_length",
+                "per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
+                    "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
+                    [ 20,  20,  40,  20,   3,   3,  20,  20,  20,   3,   3,  20,   3,   3,  20],
+                    [256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [0] *7 +[0] *8,
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                ),
+                "do_train": True,
+                "do_eval": True,
+                "do_test": True,
+
+                "model_name_or_path": f"{PATHBASE}t5-small",
+                "tokenizer_name": f"{PATHBASE}t5-small",
+                "save_total_limit": 1,
+                # For glue datasets.
+                "split_validation_test": True,
+                "seed": 42,
+                "dataset_config_name": ["en"],
+                "eval_dataset_config_name": ["en"],
+                "test_dataset_config_name": ["en"],
+                # other configurations.
+                "predict_with_generate": True,
+                # To evaluate during training.
+                "load_best_model_at_end": True,
+                "metric_for_best_model": "average_metrics",
+                "greater_is_better": True,
+                "evaluation_strategy": "steps",
+                "overwrite_output_dir": True,
+                "push_to_hub": False,
+                "push_to_delta_center": True,
+                "save_strategy": "steps"
+            }
+
+AllConfigs['prefix_t5-small'] = copy.deepcopy(BaseConfigs['t5-small'])
+AllConfigs['prefix_t5-small'].update({
+                                "delta_type": "prefix",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/prefix/t5-small/",
+                            })
+
+
+
+
+#### ROBERTA######
+BaseConfigs['roberta-base'] = {
+                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs",
+                "max_source_length",
+                "per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
+                    "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
+                    [ 20,  20,  40,  20,   3,   3,  20,  20,  20,   3,   3,  20,   3,   3,  20],
+                    [256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [0] *7 +[0] *8,
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                ),
+                "do_train": True,
+                "do_eval": True,
+                "do_test": True,
+
+                "model_name_or_path": f"{PATHBASE}roberta-base",
+                "tokenizer_name": f"{PATHBASE}roberta-base",
+                "save_total_limit": 1,
+                # For glue datasets.
+                "is_seq2seq": False,
+                "split_validation_test": True,
+                "seed": 42,
+                "dataset_config_name": ["en"],
+                "eval_dataset_config_name": ["en"],
+                "test_dataset_config_name": ["en"],
+                # other configurations.
+                "predict_with_generate": False,
+                # To evaluate during training.
+                "load_best_model_at_end": True,
+                "metric_for_best_model": "average_metrics",
+                "greater_is_better": True,
+                "evaluation_strategy": "steps",
+                "overwrite_output_dir": True,
+                "push_to_hub": True,
+                "save_strategy": "steps"
+            }
+
+
+
+AllConfigs['bitfit_roberta-base'] = copy.deepcopy(BaseConfigs['roberta-base'])
+AllConfigs['bitfit_roberta-base'].update({
+                "delta_type": "bitfit",
+                "learning_rate": 1e-3,
+                "output_dir": "outputs/bitfit/roberta-base/",
+            })
+
+AllConfigs['none_roberta-base'] = copy.deepcopy(BaseConfigs['roberta-base'])
+AllConfigs['none_roberta-base'].update({
+                "delta_type": "none",
+                "learning_rate": 1e-5,
+                "output_dir": "outputs/none/roberta-base/",
+            })
+
+
+AllConfigs['lora_roberta-base'] = copy.deepcopy(BaseConfigs['roberta-base'])
+AllConfigs['lora_roberta-base'].update({
+                "delta_type": "lora",
+                "learning_rate": 1e-3,
+                "output_dir": "outputs/lora/roberta-base/",
+            })
+
+AllConfigs['adapter_roberta-base'] = copy.deepcopy(BaseConfigs['roberta-base'])
+AllConfigs['adapter_roberta-base'].update({
+                "delta_type": "adapter",
+                "learning_rate": 1e-3,
+                "output_dir": "outputs/adapter/roberta-base/",
+            })
+
+AllConfigs['low_rank_adapter_roberta-base'] = copy.deepcopy(BaseConfigs['roberta-base'])
+AllConfigs['low_rank_adapter_roberta-base'].update({
+                "delta_type": "low_rank_adapter",
+                "learning_rate": 1e-3,
+                "output_dir": "outputs/low_rank_adapter/roberta-base/",
+            })
+
+#### ROBERTA######
+BaseConfigs['bert-base-cased'] = {
+                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs",
+                "max_source_length",
+                "per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
+                    "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
+                    [ 20,  20,  40,  20,   3,   3,  20,  20,  20,   3,   3,  20,   3,   3,  20],
+                    [256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [0] *7 +[0] *8,
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                ),
+                "do_train": True,
+                "do_eval": True,
+                "do_test": True,
+
+                "model_name_or_path": f"{PATHBASE}bert-base-cased",
+                "tokenizer_name": f"{PATHBASE}bert-base-cased",
+                "save_total_limit": 1,
+                # For glue datasets.
+                "is_seq2seq": False,
+                "split_validation_test": True,
+                "seed": 42,
+                "dataset_config_name": ["en"],
+                "eval_dataset_config_name": ["en"],
+                "test_dataset_config_name": ["en"],
+                # other configurations.
+                "predict_with_generate": False,
+                # To evaluate during training.
+                "load_best_model_at_end": True,
+                "metric_for_best_model": "average_metrics",
+                "greater_is_better": True,
+                "evaluation_strategy": "steps",
+                "overwrite_output_dir": True,
+                "push_to_hub": True,
+                "save_strategy": "steps"
+            }
+
+AllConfigs['prefix_bert-base-cased'] = copy.deepcopy(BaseConfigs['bert-base-cased'])
+AllConfigs['prefix_bert-base-cased'].update({
+                                "delta_type": "prefix",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/prefix/bert-base-cased/",
+                            })
+
+AllConfigs['soft_prompt_bert-base-cased'] = copy.deepcopy(BaseConfigs['bert-base-cased'])
+AllConfigs['soft_prompt_bert-base-cased'].update({
+                                "delta_type": "soft_prompt",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/soft_prompt/bert-base-cased/",
+                            })
+
+if __name__ == "__main__":
+    import argparse
+    import json
+    import os
+    parser = argparse.ArgumentParser("Parser to generate configuration")
+    parser.add_argument("--job", type=str)
+    args = parser.parse_args()
+
+    config = AllConfigs[args.job]
+
+    Cartesian_product = []
+    for key in config:
+        if isinstance(key, tuple):
+            Cartesian_product.append(key)
+    all_config_jsons = {}
+    for key_tuple in Cartesian_product:
+        for zipped in config[key_tuple]:
+            job_name = zipped[0]
+            all_config_jsons[job_name] = {}
+            for key_name, zipped_elem in zip(key_tuple, zipped):
+                if key_name != 'job_name':
+                    all_config_jsons[job_name][key_name] = zipped_elem
+    for key in config:
+        if not isinstance(key, tuple):
+            for job_name in all_config_jsons:
+                if key == "output_dir":
+                    all_config_jsons[job_name][key] = config[key] + job_name
+                else:
+                    all_config_jsons[job_name][key] = config[key]
+
+
+    if not os.path.exists(f"configs/{args.job}/"):
+        os.mkdir(f"configs/{args.job}/")
+
+    for job_name in all_config_jsons:
+        with open(f"configs/{args.job}/{job_name}.json", 'w') as fout:
+            json.dump(all_config_jsons[job_name], fout, indent=4,sort_keys=True)
+
+
+
diff --git a/examples/examples_prompt/configs/gen_bert.py b/examples/examples_prompt/configs/gen_bert.py
new file mode 100644
index 0000000..2fbaba8
--- /dev/null
+++ b/examples/examples_prompt/configs/gen_bert.py
@@ -0,0 +1,116 @@
+import collections
+import copy
+
+PATHBASE="/mnt/sfs_turbo/hsd/plm_cache/"
+PATHBASE="/home/hushengding/plm_cache/"
+
+AllConfigs = {}
+
+BaseConfigs = {}
+
+
+#### ROBERTA######
+BaseConfigs['bert-base-cased'] = {
+                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs",
+                "max_source_length",
+                "per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
+                    "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
+                    [ 20,  20,  40,  20,   3,   3,  20,  20,  20,   3,   3,  20,   3,   3,  20],
+                    [256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [0] *7 +[0] *8,
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                ),
+                "do_train": True,
+                "do_eval": True,
+                "do_test": True,
+
+                "model_name_or_path": f"{PATHBASE}bert-base-cased",
+                "tokenizer_name": f"{PATHBASE}bert-base-cased",
+                "save_total_limit": 1,
+                # For glue datasets.
+                "is_seq2seq": False,
+                "split_validation_test": True,
+                "seed": 42,
+                "dataset_config_name": ["en"],
+                "eval_dataset_config_name": ["en"],
+                "test_dataset_config_name": ["en"],
+                # other configurations.
+                "predict_with_generate": False,
+                # To evaluate during training.
+                "load_best_model_at_end": True,
+                "metric_for_best_model": "average_metrics",
+                "greater_is_better": True,
+                "evaluation_strategy": "steps",
+                "overwrite_output_dir": True,
+                "push_to_hub": False,
+                "push_to_delta_center": True,
+                "save_strategy": "steps"
+            }
+
+AllConfigs['prefix_bert-base-cased'] = copy.deepcopy(BaseConfigs['bert-base-cased'])
+AllConfigs['prefix_bert-base-cased'].update({
+                                "delta_type": "prefix",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/prefix/bert-base-cased/",
+                            })
+
+AllConfigs['soft_prompt_bert-base-cased'] = copy.deepcopy(BaseConfigs['bert-base-cased'])
+AllConfigs['soft_prompt_bert-base-cased'].update({
+                                "delta_type": "soft_prompt",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/soft_prompt/bert-base-cased/",
+                            })
+
+if __name__ == "__main__":
+    import argparse
+    import json
+    import os
+    parser = argparse.ArgumentParser("Parser to generate configuration")
+    parser.add_argument("--job", type=str)
+    args = parser.parse_args()
+
+    config = AllConfigs[args.job]
+
+    Cartesian_product = []
+    for key in config:
+        if isinstance(key, tuple):
+            Cartesian_product.append(key)
+    all_config_jsons = {}
+    for key_tuple in Cartesian_product:
+        for zipped in config[key_tuple]:
+            job_name = zipped[0]
+            all_config_jsons[job_name] = {}
+            for key_name, zipped_elem in zip(key_tuple, zipped):
+                if key_name != 'job_name':
+                    all_config_jsons[job_name][key_name] = zipped_elem
+    for key in config:
+        if not isinstance(key, tuple):
+            for job_name in all_config_jsons:
+                if key == "output_dir":
+                    all_config_jsons[job_name][key] = config[key] + job_name
+                else:
+                    all_config_jsons[job_name][key] = config[key]
+
+
+    if not os.path.exists(f"configs/{args.job}/"):
+        os.mkdir(f"configs/{args.job}/")
+
+    for job_name in all_config_jsons:
+        with open(f"configs/{args.job}/{job_name}.json", 'w') as fout:
+            json.dump(all_config_jsons[job_name], fout, indent=4,sort_keys=True)
+
+
+
diff --git a/examples/examples_prompt/configs/gen_gpt.py b/examples/examples_prompt/configs/gen_gpt.py
new file mode 100644
index 0000000..d33e355
--- /dev/null
+++ b/examples/examples_prompt/configs/gen_gpt.py
@@ -0,0 +1,433 @@
+import collections
+import copy
+
+PATHBASE="/mnt/sfs_turbo/hsd/plm_cache/"
+PATHBASE="/home/hushengding/plm_cache/"
+
+AllConfigs = {}
+
+BaseConfigs = {}
+BaseConfigs['t5-base'] = {
+                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs",
+                "max_source_length",
+                "per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
+                    "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
+                    [ 20,  20,  40,  20,   3,   3,  20,  20,  20,   3,   3,  20,   3,   3,  20],
+                    [256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [0] *7 +[0] *8,
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                ),
+                "do_train": True,
+                "do_eval": True,
+                "do_test": True,
+
+                "model_name_or_path": f"{PATHBASE}t5-base",
+                "tokenizer_name": f"{PATHBASE}t5-base",
+                "save_total_limit": 1,
+                # For glue datasets.
+                "split_validation_test": True,
+                "seed": 42,
+                "dataset_config_name": ["en"],
+                "eval_dataset_config_name": ["en"],
+                "test_dataset_config_name": ["en"],
+                # other configurations.
+                "predict_with_generate": True,
+                # To evaluate during training.
+                "load_best_model_at_end": True,
+                "metric_for_best_model": "average_metrics",
+                "greater_is_better": True,
+                "evaluation_strategy": "steps",
+                "overwrite_output_dir": True,
+                "push_to_hub": True,
+                "save_strategy": "steps"
+            }
+
+AllConfigs['bitfit_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['bitfit_t5-base'].update({
+                "delta_type": "bitfit",
+                "learning_rate": 3e-4,
+                "output_dir": "outputs/bitfit/t5-base/",
+            })
+
+AllConfigs['adapter_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['adapter_t5-base'].update({
+                                "delta_type": "adapter",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "bottleneck_dim":24,
+                                "output_dir": "outputs/adapter/t5-base/",
+                            })
+
+AllConfigs['lora_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['lora_t5-base'].update({
+                                "delta_type": "lora",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "lora_r": 8,
+                                "output_dir": "outputs/lora/t5-base/",
+                            })
+
+AllConfigs['compacter_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['compacter_t5-base'].update({
+                                "delta_type": "compacter",
+                                "learning_rate": 3e-3,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "output_dir": "outputs/compacter/t5-base/",
+                                "non_linearity": "gelu_new",
+
+                                #Compacter.
+                                "hypercomplex_division": 4,
+                                "hypercomplex_adapters": True,
+                                "hypercomplex_nonlinearity": "glorot-uniform",
+                                # gradient clip and clamp
+                                "gradient_clip": False,
+                                "phm_clamp": False,
+                                "normalize_phm_weight": False,
+                                "learn_phm": True,
+                                # shared one side
+                                "factorized_phm": True,
+                                "shared_phm_rule": False,
+                                "factorized_phm_rule": False,
+                                "phm_c_init": "normal",
+                                "phm_init_range": 0.0001,
+                                "use_bias_down_sampler": True,
+                                "use_bias_up_sampler": True,
+                            })
+
+AllConfigs['compacter++_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['compacter++_t5-base'].update({
+                                "delta_type": "compacter",
+                                "learning_rate": 3e-3,
+                                "do_train": True,
+                                "do_eval": True,
+                                "do_test": True,
+                                "modified_modules": [
+                                    "DenseReluDense"
+                                ],
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "output_dir": "outputs/compacter++/t5-base/",
+                                "non_linearity": "gelu_new",
+
+                                #Compacter.
+                                "hypercomplex_division": 4,
+                                "hypercomplex_adapters": True,
+                                "hypercomplex_nonlinearity": "glorot-uniform",
+                                # gradient clip and clamp
+                                "gradient_clip": False,
+                                "phm_clamp": False,
+                                "normalize_phm_weight": False,
+                                "learn_phm": True,
+                                # shared one side
+                                "factorized_phm": True,
+                                "shared_phm_rule": False,
+                                "factorized_phm_rule": False,
+                                "phm_c_init": "normal",
+                                "phm_init_range": 0.0001,
+                                "use_bias_down_sampler": True,
+                                "use_bias_up_sampler": True,
+                            })
+
+
+AllConfigs['low_rank_adapter_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['low_rank_adapter_t5-base'].update({
+                                "delta_type": "low_rank_adapter",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "output_dir": "outputs/low_rank_adapter/t5-base/",
+                                "non_linearity": "gelu_new",
+                                "low_rank_w_init": "glorot-uniform",
+                                "low_rank_rank": 1,
+                            })
+
+
+AllConfigs['soft_prompt_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['soft_prompt_t5-base'].update({
+                                "delta_type": "soft_prompt",
+                                "learning_rate": 3e-2,
+                                "soft_token_num":100,
+                                "token_init": False,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/soft_prompt/t5-base/",
+                            })
+
+AllConfigs['prefix_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['prefix_t5-base'].update({
+                                "delta_type": "prefix",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/prefix/t5-base/",
+                            })
+
+#### T5-base
+BaseConfigs['t5-small'] = {
+                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs",
+                "max_source_length",
+                "per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
+                    "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
+                    [ 20,  20,  40,  20,   3,   3,  20,  20,  20,   3,   3,  20,   3,   3,  20],
+                    [256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [0] *7 +[0] *8,
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                ),
+                "do_train": True,
+                "do_eval": True,
+                "do_test": True,
+
+                "model_name_or_path": f"{PATHBASE}t5-small",
+                "tokenizer_name": f"{PATHBASE}t5-small",
+                "save_total_limit": 1,
+                # For glue datasets.
+                "split_validation_test": True,
+                "seed": 42,
+                "dataset_config_name": ["en"],
+                "eval_dataset_config_name": ["en"],
+                "test_dataset_config_name": ["en"],
+                # other configurations.
+                "predict_with_generate": True,
+                # To evaluate during training.
+                "load_best_model_at_end": True,
+                "metric_for_best_model": "average_metrics",
+                "greater_is_better": True,
+                "evaluation_strategy": "steps",
+                "overwrite_output_dir": True,
+                "push_to_hub": True,
+                "save_strategy": "steps"
+            }
+
+AllConfigs['prefix_t5-small'] = copy.deepcopy(BaseConfigs['t5-small'])
+AllConfigs['prefix_t5-small'].update({
+                                "delta_type": "prefix",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/prefix/t5-small/",
+                            })
+
+
+
+
+#### ROBERTA######
+BaseConfigs['roberta-base'] = {
+                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs",
+                "max_source_length",
+                "per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
+                    "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
+                    [ 20,  20,  40,  20,   3,   3,  20,  20,  20,   3,   3,  20,   3,   3,  20],
+                    [256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [0] *7 +[0] *8,
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                ),
+                "do_train": True,
+                "do_eval": True,
+                "do_test": True,
+
+                "model_name_or_path": f"{PATHBASE}roberta-base",
+                "tokenizer_name": f"{PATHBASE}roberta-base",
+                "save_total_limit": 1,
+                # For glue datasets.
+                "is_seq2seq": False,
+                "split_validation_test": True,
+                "seed": 42,
+                "dataset_config_name": ["en"],
+                "eval_dataset_config_name": ["en"],
+                "test_dataset_config_name": ["en"],
+                # other configurations.
+                "predict_with_generate": False,
+                # To evaluate during training.
+                "load_best_model_at_end": True,
+                "metric_for_best_model": "average_metrics",
+                "greater_is_better": True,
+                "evaluation_strategy": "steps",
+                "overwrite_output_dir": True,
+                "push_to_hub": True,
+                "save_strategy": "steps"
+            }
+
+
+
+AllConfigs['bitfit_roberta-base'] = copy.deepcopy(BaseConfigs['roberta-base'])
+AllConfigs['bitfit_roberta-base'].update({
+                "delta_type": "bitfit",
+                "learning_rate": 1e-3,
+                "output_dir": "outputs/bitfit/roberta-base/",
+            })
+
+AllConfigs['none_roberta-base'] = copy.deepcopy(BaseConfigs['roberta-base'])
+AllConfigs['none_roberta-base'].update({
+                "delta_type": "none",
+                "learning_rate": 1e-5,
+                "output_dir": "outputs/none/roberta-base/",
+            })
+
+
+AllConfigs['lora_roberta-base'] = copy.deepcopy(BaseConfigs['roberta-base'])
+AllConfigs['lora_roberta-base'].update({
+                "delta_type": "lora",
+                "learning_rate": 1e-3,
+                "output_dir": "outputs/lora/roberta-base/",
+            })
+
+AllConfigs['adapter_roberta-base'] = copy.deepcopy(BaseConfigs['roberta-base'])
+AllConfigs['adapter_roberta-base'].update({
+                "delta_type": "adapter",
+                "learning_rate": 1e-3,
+                "output_dir": "outputs/adapter/roberta-base/",
+            })
+
+AllConfigs['low_rank_adapter_roberta-base'] = copy.deepcopy(BaseConfigs['roberta-base'])
+AllConfigs['low_rank_adapter_roberta-base'].update({
+                "delta_type": "low_rank_adapter",
+                "learning_rate": 1e-3,
+                "output_dir": "outputs/low_rank_adapter/roberta-base/",
+            })
+
+#### ROBERTA######
+BaseConfigs['bert-base-cased'] = {
+                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs",
+                "max_source_length",
+                "per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
+                    "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
+                    [ 20,  20,  40,  20,   3,   3,  20,  20,  20,   3,   3,  20,   3,   3,  20],
+                    [256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [0] *7 +[0] *8,
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                ),
+                "do_train": True,
+                "do_eval": True,
+                "do_test": True,
+
+                "model_name_or_path": f"{PATHBASE}bert-base-cased",
+                "tokenizer_name": f"{PATHBASE}bert-base-cased",
+                "save_total_limit": 1,
+                # For glue datasets.
+                "is_seq2seq": False,
+                "split_validation_test": True,
+                "seed": 42,
+                "dataset_config_name": ["en"],
+                "eval_dataset_config_name": ["en"],
+                "test_dataset_config_name": ["en"],
+                # other configurations.
+                "predict_with_generate": False,
+                # To evaluate during training.
+                "load_best_model_at_end": True,
+                "metric_for_best_model": "average_metrics",
+                "greater_is_better": True,
+                "evaluation_strategy": "steps",
+                "overwrite_output_dir": True,
+                "push_to_hub": True,
+                "save_strategy": "steps"
+            }
+
+AllConfigs['prefix_bert-base-cased'] = copy.deepcopy(BaseConfigs['bert-base-cased'])
+AllConfigs['prefix_bert-base-cased'].update({
+                                "delta_type": "prefix",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/prefix/bert-base-cased/",
+                            })
+
+AllConfigs['soft_prompt_bert-base-cased'] = copy.deepcopy(BaseConfigs['bert-base-cased'])
+AllConfigs['soft_prompt_bert-base-cased'].update({
+                                "delta_type": "soft_prompt",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/soft_prompt/bert-base-cased/",
+                            })
+
+if __name__ == "__main__":
+    import argparse
+    import json
+    import os
+    parser = argparse.ArgumentParser("Parser to generate configuration")
+    parser.add_argument("--job", type=str)
+    args = parser.parse_args()
+
+    config = AllConfigs[args.job]
+
+    Cartesian_product = []
+    for key in config:
+        if isinstance(key, tuple):
+            Cartesian_product.append(key)
+    all_config_jsons = {}
+    for key_tuple in Cartesian_product:
+        for zipped in config[key_tuple]:
+            job_name = zipped[0]
+            all_config_jsons[job_name] = {}
+            for key_name, zipped_elem in zip(key_tuple, zipped):
+                if key_name != 'job_name':
+                    all_config_jsons[job_name][key_name] = zipped_elem
+    for key in config:
+        if not isinstance(key, tuple):
+            for job_name in all_config_jsons:
+                if key == "output_dir":
+                    all_config_jsons[job_name][key] = config[key] + job_name
+                else:
+                    all_config_jsons[job_name][key] = config[key]
+
+
+    if not os.path.exists(f"configs/{args.job}/"):
+        os.mkdir(f"configs/{args.job}/")
+
+    for job_name in all_config_jsons:
+        with open(f"configs/{args.job}/{job_name}.json", 'w') as fout:
+            json.dump(all_config_jsons[job_name], fout, indent=4,sort_keys=True)
+
+
+
diff --git a/examples/examples_prompt/configs/gen_roberta.py b/examples/examples_prompt/configs/gen_roberta.py
new file mode 100644
index 0000000..c21baa5
--- /dev/null
+++ b/examples/examples_prompt/configs/gen_roberta.py
@@ -0,0 +1,143 @@
+import collections
+import copy
+
+PATHBASE="/mnt/sfs_turbo/hsd/plm_cache/"
+PATHBASE="/home/hushengding/plm_cache/"
+
+AllConfigs = {}
+
+BaseConfigs = {}
+
+#### ROBERTA######
+BaseConfigs['roberta-base'] = {
+                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs",
+                "max_source_length",
+                "per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
+                    "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
+                    [ 20,  20,  40,  20,   3,   3,  20,  20,  20,   3,   3,  20,   3,   3,  20],
+                    [256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [0] *7 +[0] *8,
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                ),
+                "do_train": True,
+                "do_eval": True,
+                "do_test": True,
+
+                "model_name_or_path": f"{PATHBASE}roberta-base",
+                "tokenizer_name": f"{PATHBASE}roberta-base",
+                "save_total_limit": 1,
+                # For glue datasets.
+                "is_seq2seq": False,
+                "split_validation_test": True,
+                "seed": 42,
+                "dataset_config_name": ["en"],
+                "eval_dataset_config_name": ["en"],
+                "test_dataset_config_name": ["en"],
+                # other configurations.
+                "predict_with_generate": False,
+                # To evaluate during training.
+                "load_best_model_at_end": True,
+                "metric_for_best_model": "average_metrics",
+                "greater_is_better": True,
+                "evaluation_strategy": "steps",
+                "overwrite_output_dir": True,
+                "push_to_hub": True,
+                "save_strategy": "steps"
+            }
+
+
+
+AllConfigs['bitfit_roberta-base'] = copy.deepcopy(BaseConfigs['roberta-base'])
+AllConfigs['bitfit_roberta-base'].update({
+                "delta_type": "bitfit",
+                "learning_rate": 1e-3,
+                "output_dir": "outputs/bitfit/roberta-base/",
+            })
+
+AllConfigs['none_roberta-base'] = copy.deepcopy(BaseConfigs['roberta-base'])
+AllConfigs['none_roberta-base'].update({
+                "delta_type": "none",
+                "learning_rate": 1e-5,
+                "output_dir": "outputs/none/roberta-base/",
+            })
+
+
+AllConfigs['lora_roberta-base'] = copy.deepcopy(BaseConfigs['roberta-base'])
+AllConfigs['lora_roberta-base'].update({
+                "delta_type": "lora",
+                "learning_rate": 1e-3,
+                "output_dir": "outputs/lora/roberta-base/",
+            })
+
+AllConfigs['adapter_roberta-base'] = copy.deepcopy(BaseConfigs['roberta-base'])
+AllConfigs['adapter_roberta-base'].update({
+                "delta_type": "adapter",
+                "learning_rate": 1e-3,
+                "output_dir": "outputs/adapter/roberta-base/",
+            })
+
+AllConfigs['low_rank_adapter_roberta-base'] = copy.deepcopy(BaseConfigs['roberta-base'])
+AllConfigs['low_rank_adapter_roberta-base'].update({
+                "delta_type": "low_rank_adapter",
+                "learning_rate": 1e-3,
+                "output_dir": "outputs/low_rank_adapter/roberta-base/",
+            })
+
+
+AllConfigs['soft_prompt_roberta-base'] = copy.deepcopy(BaseConfigs['roberta-base'])
+AllConfigs['soft_prompt_roberta-base'].update({
+                                "delta_type": "soft_prompt",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/soft_prompt/roberta-base/",
+                            })
+
+if __name__ == "__main__":
+    import argparse
+    import json
+    import os
+    parser = argparse.ArgumentParser("Parser to generate configuration")
+    parser.add_argument("--job", type=str)
+    args = parser.parse_args()
+
+    config = AllConfigs[args.job]
+
+    Cartesian_product = []
+    for key in config:
+        if isinstance(key, tuple):
+            Cartesian_product.append(key)
+    all_config_jsons = {}
+    for key_tuple in Cartesian_product:
+        for zipped in config[key_tuple]:
+            job_name = zipped[0]
+            all_config_jsons[job_name] = {}
+            for key_name, zipped_elem in zip(key_tuple, zipped):
+                if key_name != 'job_name':
+                    all_config_jsons[job_name][key_name] = zipped_elem
+    for key in config:
+        if not isinstance(key, tuple):
+            for job_name in all_config_jsons:
+                if key == "output_dir":
+                    all_config_jsons[job_name][key] = config[key] + job_name
+                else:
+                    all_config_jsons[job_name][key] = config[key]
+
+
+    if not os.path.exists(f"configs/{args.job}/"):
+        os.mkdir(f"configs/{args.job}/")
+
+    for job_name in all_config_jsons:
+        with open(f"configs/{args.job}/{job_name}.json", 'w') as fout:
+            json.dump(all_config_jsons[job_name], fout, indent=4,sort_keys=True)
+
+
+
diff --git a/examples/examples_prompt/configs/gen_t5.py b/examples/examples_prompt/configs/gen_t5.py
new file mode 100644
index 0000000..b2e15a5
--- /dev/null
+++ b/examples/examples_prompt/configs/gen_t5.py
@@ -0,0 +1,444 @@
+import collections
+import copy
+
+PATHBASE="/mnt/sfs_turbo/hsd/plm_cache/"
+PATHBASE="/home/hushengding/plm_cache/"
+
+AllConfigs = {}
+
+BaseConfigs = {}
+BaseConfigs['t5-base'] = {
+                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs",
+                "max_source_length",
+                "per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
+                    "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
+                    [ 20,  20,  40,  20,   3,   3,  20,  20,  20,   3,   3,  20,   3,   3,  20],
+                    [256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [0] *7 +[0] *8,
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                ),
+                "do_train": True,
+                "do_eval": True,
+                "do_test": True,
+
+                "model_name_or_path": f"{PATHBASE}t5-base",
+                "tokenizer_name": f"{PATHBASE}t5-base",
+                "save_total_limit": 1,
+                # For glue datasets.
+                "split_validation_test": True,
+                "seed": 42,
+                "dataset_config_name": ["en"],
+                "eval_dataset_config_name": ["en"],
+                "test_dataset_config_name": ["en"],
+                # other configurations.
+                "predict_with_generate": True,
+                # To evaluate during training.
+                "load_best_model_at_end": True,
+                "metric_for_best_model": "average_metrics",
+                "greater_is_better": True,
+                "evaluation_strategy": "steps",
+                "overwrite_output_dir": True,
+                "push_to_hub": False,
+                "push_to_delta_center": True,
+                "save_strategy": "steps"
+            }
+
+AllConfigs['bitfit_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['bitfit_t5-base'].update({
+                "delta_type": "bitfit",
+                "learning_rate": 3e-4,
+                "output_dir": "outputs/bitfit/t5-base/",
+            })
+
+AllConfigs['adapter_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['adapter_t5-base'].update({
+                                "delta_type": "adapter",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "bottleneck_dim":24,
+                                "output_dir": "outputs/adapter/t5-base/",
+                            })
+
+AllConfigs['lora_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['lora_t5-base'].update({
+                                "delta_type": "lora",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "lora_r": 8,
+                                "output_dir": "outputs/lora/t5-base/",
+                            })
+
+AllConfigs['compacter_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['compacter_t5-base'].update({
+                                "delta_type": "compacter",
+                                "learning_rate": 3e-3,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "output_dir": "outputs/compacter/t5-base/",
+                                "non_linearity": "gelu_new",
+
+                                #Compacter.
+                                "hypercomplex_division": 4,
+                                "hypercomplex_adapters": True,
+                                "hypercomplex_nonlinearity": "glorot-uniform",
+                                # gradient clip and clamp
+                                "gradient_clip": False,
+                                "phm_clamp": False,
+                                "normalize_phm_weight": False,
+                                "learn_phm": True,
+                                # shared one side
+                                "factorized_phm": True,
+                                "shared_phm_rule": False,
+                                "factorized_phm_rule": False,
+                                "phm_c_init": "normal",
+                                "phm_init_range": 0.0001,
+                                "use_bias_down_sampler": True,
+                                "use_bias_up_sampler": True,
+                            })
+
+AllConfigs['compacter++_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['compacter++_t5-base'].update({
+                                "delta_type": "compacter",
+                                "learning_rate": 3e-3,
+                                "do_train": True,
+                                "do_eval": True,
+                                "do_test": True,
+                                "modified_modules": [
+                                    "DenseReluDense"
+                                ],
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "output_dir": "outputs/compacter++/t5-base/",
+                                "non_linearity": "gelu_new",
+
+                                #Compacter.
+                                "hypercomplex_division": 4,
+                                "hypercomplex_adapters": True,
+                                "hypercomplex_nonlinearity": "glorot-uniform",
+                                # gradient clip and clamp
+                                "gradient_clip": False,
+                                "phm_clamp": False,
+                                "normalize_phm_weight": False,
+                                "learn_phm": True,
+                                # shared one side
+                                "factorized_phm": True,
+                                "shared_phm_rule": False,
+                                "factorized_phm_rule": False,
+                                "phm_c_init": "normal",
+                                "phm_init_range": 0.0001,
+                                "use_bias_down_sampler": True,
+                                "use_bias_up_sampler": True,
+                            })
+
+
+AllConfigs['low_rank_adapter_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['low_rank_adapter_t5-base'].update({
+                                "delta_type": "low_rank_adapter",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "output_dir": "outputs/low_rank_adapter/t5-base/",
+                                "non_linearity": "gelu_new",
+                                "low_rank_w_init": "glorot-uniform",
+                                "low_rank_rank": 1,
+                            })
+
+
+AllConfigs['soft_prompt_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['soft_prompt_t5-base'].update({
+                                "delta_type": "soft_prompt",
+                                "learning_rate": 3e-2,
+                                "soft_token_num":100,
+                                "token_init": False,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/soft_prompt/t5-base/",
+                            })
+
+AllConfigs['prefix_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['prefix_t5-base'].update({
+                                "delta_type": "prefix",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/prefix/t5-base/",
+                            })
+
+AllConfigs['soft_prompt_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['soft_prompt_t5-base'].update({
+                                "delta_type": "soft_prompt",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/soft_prompt/t5-base/",
+                            })
+#### T5-base
+BaseConfigs['t5-small'] = {
+                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs",
+                "max_source_length",
+                "per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
+                    "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
+                    [ 20,  20,  40,  20,   3,   3,  20,  20,  20,   3,   3,  20,   3,   3,  20],
+                    [256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [0] *7 +[0] *8,
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                ),
+                "do_train": True,
+                "do_eval": True,
+                "do_test": True,
+
+                "model_name_or_path": f"{PATHBASE}t5-small",
+                "tokenizer_name": f"{PATHBASE}t5-small",
+                "save_total_limit": 1,
+                # For glue datasets.
+                "split_validation_test": True,
+                "seed": 42,
+                "dataset_config_name": ["en"],
+                "eval_dataset_config_name": ["en"],
+                "test_dataset_config_name": ["en"],
+                # other configurations.
+                "predict_with_generate": True,
+                # To evaluate during training.
+                "load_best_model_at_end": True,
+                "metric_for_best_model": "average_metrics",
+                "greater_is_better": True,
+                "evaluation_strategy": "steps",
+                "overwrite_output_dir": True,
+                "push_to_hub": False,
+                "push_to_delta_center": True,
+                "save_strategy": "steps"
+            }
+
+AllConfigs['prefix_t5-small'] = copy.deepcopy(BaseConfigs['t5-small'])
+AllConfigs['prefix_t5-small'].update({
+                                "delta_type": "prefix",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/prefix/t5-small/",
+                            })
+
+
+
+
+#### ROBERTA######
+BaseConfigs['roberta-base'] = {
+                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs",
+                "max_source_length",
+                "per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
+                    "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
+                    [ 20,  20,  40,  20,   3,   3,  20,  20,  20,   3,   3,  20,   3,   3,  20],
+                    [256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [0] *7 +[0] *8,
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                ),
+                "do_train": True,
+                "do_eval": True,
+                "do_test": True,
+
+                "model_name_or_path": f"{PATHBASE}roberta-base",
+                "tokenizer_name": f"{PATHBASE}roberta-base",
+                "save_total_limit": 1,
+                # For glue datasets.
+                "is_seq2seq": False,
+                "split_validation_test": True,
+                "seed": 42,
+                "dataset_config_name": ["en"],
+                "eval_dataset_config_name": ["en"],
+                "test_dataset_config_name": ["en"],
+                # other configurations.
+                "predict_with_generate": False,
+                # To evaluate during training.
+                "load_best_model_at_end": True,
+                "metric_for_best_model": "average_metrics",
+                "greater_is_better": True,
+                "evaluation_strategy": "steps",
+                "overwrite_output_dir": True,
+                "push_to_hub": True,
+                "save_strategy": "steps"
+            }
+
+
+
+AllConfigs['bitfit_roberta-base'] = copy.deepcopy(BaseConfigs['roberta-base'])
+AllConfigs['bitfit_roberta-base'].update({
+                "delta_type": "bitfit",
+                "learning_rate": 1e-3,
+                "output_dir": "outputs/bitfit/roberta-base/",
+            })
+
+AllConfigs['none_roberta-base'] = copy.deepcopy(BaseConfigs['roberta-base'])
+AllConfigs['none_roberta-base'].update({
+                "delta_type": "none",
+                "learning_rate": 1e-5,
+                "output_dir": "outputs/none/roberta-base/",
+            })
+
+
+AllConfigs['lora_roberta-base'] = copy.deepcopy(BaseConfigs['roberta-base'])
+AllConfigs['lora_roberta-base'].update({
+                "delta_type": "lora",
+                "learning_rate": 1e-3,
+                "output_dir": "outputs/lora/roberta-base/",
+            })
+
+AllConfigs['adapter_roberta-base'] = copy.deepcopy(BaseConfigs['roberta-base'])
+AllConfigs['adapter_roberta-base'].update({
+                "delta_type": "adapter",
+                "learning_rate": 1e-3,
+                "output_dir": "outputs/adapter/roberta-base/",
+            })
+
+AllConfigs['low_rank_adapter_roberta-base'] = copy.deepcopy(BaseConfigs['roberta-base'])
+AllConfigs['low_rank_adapter_roberta-base'].update({
+                "delta_type": "low_rank_adapter",
+                "learning_rate": 1e-3,
+                "output_dir": "outputs/low_rank_adapter/roberta-base/",
+            })
+
+#### ROBERTA######
+BaseConfigs['bert-base-cased'] = {
+                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs",
+                "max_source_length",
+                "per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
+                    "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"],
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
+                    [ 20,  20,  40,  20,   3,   3,  20,  20,  20,   3,   3,  20,   3,   3,  20],
+                    [256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [0] *7 +[0] *8,
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                ),
+                "do_train": True,
+                "do_eval": True,
+                "do_test": True,
+
+                "model_name_or_path": f"{PATHBASE}bert-base-cased",
+                "tokenizer_name": f"{PATHBASE}bert-base-cased",
+                "save_total_limit": 1,
+                # For glue datasets.
+                "is_seq2seq": False,
+                "split_validation_test": True,
+                "seed": 42,
+                "dataset_config_name": ["en"],
+                "eval_dataset_config_name": ["en"],
+                "test_dataset_config_name": ["en"],
+                # other configurations.
+                "predict_with_generate": False,
+                # To evaluate during training.
+                "load_best_model_at_end": True,
+                "metric_for_best_model": "average_metrics",
+                "greater_is_better": True,
+                "evaluation_strategy": "steps",
+                "overwrite_output_dir": True,
+                "push_to_hub": True,
+                "save_strategy": "steps"
+            }
+
+AllConfigs['prefix_bert-base-cased'] = copy.deepcopy(BaseConfigs['bert-base-cased'])
+AllConfigs['prefix_bert-base-cased'].update({
+                                "delta_type": "prefix",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/prefix/bert-base-cased/",
+                            })
+
+AllConfigs['soft_prompt_bert-base-cased'] = copy.deepcopy(BaseConfigs['bert-base-cased'])
+AllConfigs['soft_prompt_bert-base-cased'].update({
+                                "delta_type": "soft_prompt",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/soft_prompt/bert-base-cased/",
+                            })
+
+if __name__ == "__main__":
+    import argparse
+    import json
+    import os
+    parser = argparse.ArgumentParser("Parser to generate configuration")
+    parser.add_argument("--job", type=str)
+    args = parser.parse_args()
+
+    config = AllConfigs[args.job]
+
+    Cartesian_product = []
+    for key in config:
+        if isinstance(key, tuple):
+            Cartesian_product.append(key)
+    all_config_jsons = {}
+    for key_tuple in Cartesian_product:
+        for zipped in config[key_tuple]:
+            job_name = zipped[0]
+            all_config_jsons[job_name] = {}
+            for key_name, zipped_elem in zip(key_tuple, zipped):
+                if key_name != 'job_name':
+                    all_config_jsons[job_name][key_name] = zipped_elem
+    for key in config:
+        if not isinstance(key, tuple):
+            for job_name in all_config_jsons:
+                if key == "output_dir":
+                    all_config_jsons[job_name][key] = config[key] + job_name
+                else:
+                    all_config_jsons[job_name][key] = config[key]
+
+
+    if not os.path.exists(f"configs/{args.job}/"):
+        os.mkdir(f"configs/{args.job}/")
+
+    for job_name in all_config_jsons:
+        with open(f"configs/{args.job}/{job_name}.json", 'w') as fout:
+            json.dump(all_config_jsons[job_name], fout, indent=4,sort_keys=True)
+
+
+
diff --git a/examples/examples_prompt/run.py b/examples/examples_prompt/run.py
deleted file mode 100644
index 962f608..0000000
--- a/examples/examples_prompt/run.py
+++ /dev/null
@@ -1,482 +0,0 @@
-# coding=utf-8
-# Copyright The HuggingFace Team and The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Fine-tuning the library models for sequence to sequence.
-"""
-# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
-import functools
-import logging
-from opendelta.utils.delta_center import create_hub_repo_name
-import torch
-import os
-os.environ['MKL_THREADING_LAYER'] = 'GNU'
-os.environ['MKL_SERVICE_FORCE_INTEL'] = '1'
-import sys
-import subprocess
-from typing import Optional, List
-
-from datasets import load_dataset, load_metric, concatenate_datasets
-import transformers
-from transformers import (
-    AutoConfig,
-    AutoModelForSeq2SeqLM,
-    AutoTokenizer,
-    HfArgumentParser,
-    MBartTokenizer,
-    default_data_collator,
-    set_seed,
-)
-from transformers.trainer_utils import is_main_process, get_last_checkpoint
-# from ..seq2seq.utils import get_adapter_config
-from examples_prompt.data_processors import AutoTask, TaskDataCollatorForSeq2Seq, AutoPostProcessor
-from examples_prompt.seq2seq_trainer import Seq2SeqTrainer
-# from training_args import AdapterTrainingArguments
-from examples_prompt.trainers.trainer_utils import save_training_config
-from dataclasses import dataclass, field
-
-from transformers.models.t5.modeling_t5 import T5Config, T5ForConditionalGeneration
-from examples_prompt.utils.args import ModelArguments
-from examples_prompt.trainers.trainer_args import TrainingArguments, DataTrainingArguments
-from transformers.trainer import Trainer
-from examples_prompt.metrics.metrics import transform_for_generation
-import json
-logger = logging.getLogger(__name__)
-
-
-
-TASK_TO_METRICS = {"mrpc": ["accuracy", "f1"],
-                  "cola": ['matthews_correlation'],
-                  "stsb": ['pearson', 'spearmanr'],
-                  'sst2': ['accuracy'],
-                  "mnli": ["accuracy"],
-                  "mnli_mismatched": ["accuracy"],
-                  "mnli_matched": ["accuracy"],
-                  "qnli": ["accuracy"],
-                  "rte": ["accuracy"],
-                  "wnli": ["accuracy"],
-                  "qqp": ["accuracy", "f1"],
-                  "superglue-boolq": ["accuracy"],
-                  "superglue-rte": ["accuracy"],
-                  "superglue-cb": ["f1_multiclass", "accuracy"],
-                  "superglue-copa": ["accuracy"],
-                  "superglue-multirc": ["f1", "em"],
-                  "superglue-wic": ["accuracy"],
-                  "superglue-wsc.fixed": ["accuracy"],
-                  "superglue-record": ["f1", "em"]
-         }
-
-
-class RemainArgHfArgumentParser(HfArgumentParser):
-    def parse_json_file(self, json_file: str, return_remaining_args=True ):
-        """
-        Alternative helper method that does not use `argparse` at all, instead loading a json file and populating the
-        dataclass types.
-        """
-        import argparse
-        import json
-        from pathlib import Path
-        import dataclasses
-
-        data = json.loads(Path(json_file).read_text())
-        outputs = []
-        for dtype in self.dataclass_types:
-            keys = {f.name for f in dataclasses.fields(dtype) if f.init}
-            inputs = {k: data.pop(k) for k in list(data.keys()) if k in keys}
-            obj = dtype(**inputs)
-            outputs.append(obj)
-
-        remain_args = argparse.ArgumentParser()
-        remain_args.__dict__.update(data)
-        if return_remaining_args:
-            return (*outputs, remain_args)
-        else:
-            return (*outputs,)
-
-
-
-def main():
-
-    # See all possible arguments in src/transformers/training_args.py
-    # or by passing the --help flag to this script.
-    # We now keep distinct sets of args, for a cleaner separation of concerns.
-    parser = RemainArgHfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
-    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
-        # If we pass only one argument to the script and it's the path to a json file,
-        # let's parse it to get our arguments.
-        model_args, data_args, training_args, delta_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
-    else:
-        model_args, data_args, training_args, delta_args = parser.parse_args_into_dataclasses(return_remaining_strings=True)
-
-
-    # Detecting last checkpoint.
-    last_checkpoint = None
-    if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
-        last_checkpoint = get_last_checkpoint(training_args.output_dir)
-        print("#### last_checkpoint ", last_checkpoint)
-        if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
-            '''
-            raise ValueError(
-                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
-                "Use --overwrite_output_dir to overcome."
-            )
-            '''
-            pass
-        elif last_checkpoint is not None:
-            logger.info(
-                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
-                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
-            )
-
-    # Setup logging
-    logging.basicConfig(
-        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
-        datefmt="%m/%d/%Y %H:%M:%S",
-        handlers=[logging.StreamHandler(sys.stdout)],
-    )
-    logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
-
-    # Log on each process the small summary:
-    logger.warning(
-        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
-        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
-    )
-    # Set the verbosity to info of the Transformers logger (on main process only):
-    if is_main_process(training_args.local_rank):
-        transformers.utils.logging.set_verbosity_info()
-    logger.info("Training/evaluation parameters %s", training_args)
-
-    # Set seed before initializing model.
-    set_seed(training_args.seed)
-
-    # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
-    # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
-    # (the dataset will be downloaded automatically from the datasets Hub).
-    #
-    # For CSV/JSON files in the summarization task, this script will use the first column for the full texts and the
-    # second column for the summaries (unless you specify column names for this with the `text_column` and
-    # `summary_column` arguments).
-    # For translation, only JSON files are supported, with one field named "translation" containing two keys for the
-    # source and target languages (unless you adapt what follows).
-    #
-    # In distributed training, the load_dataset function guarantee that only one local process can concurrently
-    # download the dataset.
-    # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
-    # https://huggingface.co/docs/datasets/loading_datasets.html.
-
-    # Load pretrained model and tokenizer
-    #
-    # Distributed training:
-    # The .from_pretrained methods guarantee that only one local process can concurrently
-    # download model & vocab.
-    config = AutoConfig.from_pretrained(
-        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
-        cache_dir=model_args.cache_dir,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-    config.dropout_rate = 0.0
-    tokenizer = AutoTokenizer.from_pretrained(
-        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
-        cache_dir=model_args.cache_dir,
-        use_fast=model_args.use_fast_tokenizer,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-    model = AutoModelForSeq2SeqLM.from_pretrained(
-        model_args.model_name_or_path,
-        from_tf=bool(".ckpt" in model_args.model_name_or_path),
-        config=config,
-        cache_dir=model_args.cache_dir,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-    model.resize_token_embeddings(len(tokenizer))
-
-
-    if delta_args.delta_type.lower() != "none":
-        from opendelta import AutoDeltaConfig,AutoDeltaModel
-        delta_config = AutoDeltaConfig.from_dict(vars(delta_args))
-        delta_model = AutoDeltaModel.from_config(delta_config, backbone_model=model)
-        delta_model.freeze_module(set_state_dict = True)
-        delta_model.log(delta_ratio=True, trainable_ratio=True, visualization=True)
-
-
-    # model parallelize
-    if hasattr(training_args, "model_parallel") and training_args.model_parallel:
-        logger.info('parallelize model!')
-        model.parallelize()
-
-    data_args.dataset_name = [data_args.task_name]
-    data_args.eval_dataset_name = [data_args.eval_dataset_name]
-    data_args.test_dataset_name = [data_args.test_dataset_name]
-    data_args.dataset_config_name = [data_args.dataset_config_name]
-    data_args.eval_dataset_config_name = [data_args.eval_dataset_config_name]
-    data_args.test_dataset_config_name = [data_args.test_dataset_config_name]
-    assert len(data_args.dataset_name) == len(data_args.dataset_config_name)
-    if data_args.eval_dataset_name is not None:
-        assert len(data_args.eval_dataset_name) == len(data_args.eval_dataset_config_name)
-    if data_args.test_dataset_name is not None:
-        assert len(data_args.test_dataset_name) == len(data_args.test_dataset_config_name)
-
-    # Temporarily set max_target_length for training.
-    #max_target_length = data_args.max_target_length
-    padding = "max_length" if data_args.pad_to_max_length else False
-
-    def preprocess_function(examples, max_target_length):
-        # max_target_length += 1
-        model_inputs = tokenizer([s+"<extra_id_0>" for s in examples['source']], max_length=data_args.max_source_length,
-                                 padding=padding, truncation=True)
-        # # Setup the tokenizer for targets
-        with tokenizer.as_target_tokenizer():
-            labels = tokenizer([t for t in examples['target']], max_length=max_target_length, padding=padding, truncation=True)
-        # model_inputs = tokenizer([s for s in examples['source']], max_length=data_args.max_source_length,
-        #                          padding=padding, truncation=True)
-        # Setup the tokenizer for targets
-        # with tokenizer.as_target_tokenizer():
-        #     labels = tokenizer([t for t in examples['target']], max_length=max_target_length, padding=padding, truncation=True)
-        # If we are padding here, replace all tokenizer.pad_token_id in the labels by -100 when we want to ignore
-        # padding in the loss.
-        if padding == "max_length" and data_args.ignore_pad_token_for_loss:
-            labels["input_ids"] = [
-                [(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels["input_ids"]
-            ]
-        model_inputs["labels"] = labels["input_ids"]
-        model_inputs["extra_fields"] = examples['extra_fields']
-        return model_inputs
-
-    column_names = ['source', 'target', 'label', 'extra_fields']
-    performance_metrics = {}
-    if training_args.do_train:
-        train_datasets = [AutoTask.get(dataset_name,
-                                       dataset_config_name,
-                                       seed=data_args.data_seed).get(
-            split="train",
-            split_validation_test=training_args.split_validation_test,
-            add_prefix=True,
-            n_obs=data_args.max_train_samples)
-            for dataset_name, dataset_config_name\
-            in zip(data_args.dataset_name, data_args.dataset_config_name)]
-        max_target_lengths = [AutoTask.get(dataset_name, dataset_config_name).get_max_target_length(\
-            tokenizer=tokenizer, default_max_length=data_args.max_target_length)\
-            for dataset_name, dataset_config_name in zip(data_args.dataset_name, data_args.dataset_config_name)]
-        for i, train_dataset in enumerate(train_datasets):
-            train_datasets[i] = train_datasets[i].map(
-                functools.partial(preprocess_function, max_target_length=max_target_lengths[i]),
-                batched=True,
-                num_proc=data_args.preprocessing_num_workers,
-                remove_columns=column_names, # if train_dataset != "superglue-record" else column_names+["answers"],
-                load_from_cache_file=not data_args.overwrite_cache,
-            )
-        train_dataset = concatenate_datasets(train_datasets)
-        print(f"Train dataset size {len(train_dataset)}")
-
-    if training_args.do_eval:
-        eval_datasets = {eval_dataset: AutoTask.get(eval_dataset, eval_dataset_config,
-            seed=data_args.data_seed).get(
-            split="validation",
-            split_validation_test=training_args.split_validation_test,
-            add_prefix=True,
-            n_obs=data_args.max_val_samples)
-            for eval_dataset, eval_dataset_config in zip(data_args.eval_dataset_name, data_args.eval_dataset_config_name)}
-        max_target_lengths = [AutoTask.get(dataset_name, dataset_config_name).get_max_target_length( \
-            tokenizer=tokenizer, default_max_length=data_args.max_target_length) \
-            for dataset_name, dataset_config_name in zip(data_args.eval_dataset_name, data_args.eval_dataset_config_name)]
-        for k, name in enumerate(eval_datasets):
-            eval_datasets[name] = eval_datasets[name].map(
-                    functools.partial(preprocess_function, max_target_length=max_target_lengths[k]),
-                    batched=True,
-                    num_proc=data_args.preprocessing_num_workers,
-                    remove_columns=column_names, # if name != "superglue-record" else column_names+["answers"],
-                    load_from_cache_file=not data_args.overwrite_cache,
-            )
-
-    if training_args.do_test:
-        test_datasets = {test_dataset: AutoTask.get(test_dataset, test_dataset_config,
-            seed=data_args.data_seed).get(
-            split="test",
-            split_validation_test=training_args.split_validation_test,
-            add_prefix=True,
-            n_obs=data_args.max_test_samples)
-            for test_dataset, test_dataset_config in zip(data_args.test_dataset_name, data_args.test_dataset_config_name)}
-        max_target_lengths = [AutoTask.get(dataset_name, dataset_config_name).get_max_target_length( \
-            tokenizer=tokenizer, default_max_length=data_args.max_target_length) \
-            for dataset_name, dataset_config_name in zip(data_args.test_dataset_name, data_args.test_dataset_config_name)]
-        for k, name in enumerate(test_datasets):
-            test_datasets[name] = test_datasets[name].map(
-                    functools.partial(preprocess_function, max_target_length=max_target_lengths[k]),
-                    batched=True,
-                    num_proc=data_args.preprocessing_num_workers,
-                    remove_columns=column_names,
-                    load_from_cache_file=not data_args.overwrite_cache,
-            )
-
-    # Data collator
-    label_pad_token_id = -100 if data_args.ignore_pad_token_for_loss else tokenizer.pad_token_id
-    if data_args.pad_to_max_length:
-        data_collator = default_data_collator
-    else:
-        data_collator = TaskDataCollatorForSeq2Seq(
-            tokenizer,
-            label_pad_token_id=label_pad_token_id,
-            pad_to_multiple_of=8 if training_args.fp16 else None,
-        )
-
-
-    # Metric, we assume we have only one training task.
-    eval_metrics = [AutoTask.get(dataset_name, dataset_config_name).metric\
-        for dataset_name, dataset_config_name in zip(data_args.dataset_name, data_args.dataset_config_name)][0]
-
-    # Extracts the extra information needed to evaluate on each dataset.
-    # These information are only used in the compute_metrics.
-    # We will assume that the test/eval dataloader does not change the order of
-    # the data.
-    data_info = {"eval": eval_datasets[data_args.eval_dataset_name[0]]['extra_fields'],
-                 "test": test_datasets[data_args.test_dataset_name[0]]['extra_fields'],
-                 "train": train_dataset['extra_fields']}
-    def compute_metrics(eval_preds):
-        preds, labels, data_info = eval_preds
-        post_processor = AutoPostProcessor.get(data_args.dataset_name[0], tokenizer,
-                                               data_args.ignore_pad_token_for_loss)
-        decoded_preds, decoded_labels = post_processor.process(preds, labels, data_info)
-        decoded_preds, decoded_labels  = transform_for_generation(decoded_preds, decoded_labels)
-        result = {}
-        for metric in eval_metrics:
-            result.update(metric(decoded_preds, decoded_labels))
-        return result
-
-
-    # Initialize our Trainer
-    if training_args.is_seq2seq == True:
-        trainer = Seq2SeqTrainer(
-            model=model,
-            args=training_args,
-            delta_args=delta_args,
-            train_dataset=train_dataset if training_args.do_train else None,
-            eval_dataset=list(eval_datasets.values())[0] if training_args.do_eval else None,
-            data_info = data_info,
-            tokenizer=tokenizer,
-            data_collator=data_collator,
-            compute_metrics=compute_metrics if training_args.predict_with_generate else None,
-            evaluation_metrics = TASK_TO_METRICS[data_args.dataset_name[0]],
-        )
-    else:
-        trainer = Trainer(
-        model=model,
-        args=training_args,
-        train_dataset=train_dataset if training_args.do_train else None,
-        eval_dataset=eval_dataset if training_args.do_eval else None,
-        compute_metrics=compute_metrics,
-        tokenizer=tokenizer,
-        data_collator=data_collator,
-    )
-
-
-    # Saves training config.
-    if trainer.is_world_process_zero():
-       os.makedirs(training_args.output_dir, exist_ok=True)
-       save_training_config(sys.argv[1], training_args.output_dir)
-
-    # Training
-    if training_args.do_train:
-        checkpoint = None
-        if training_args.resume_from_checkpoint is not None:
-            checkpoint = training_args.resume_from_checkpoint
-        elif last_checkpoint is not None:
-            checkpoint = last_checkpoint
-
-        if training_args.compute_time:
-            torch.cuda.synchronize()  # wait for move to complete
-            start = torch.cuda.Event(enable_timing=True)
-            end = torch.cuda.Event(enable_timing=True)
-            start.record()
-
-        train_result = trainer.train(resume_from_checkpoint=checkpoint)
-
-        if training_args.compute_time:
-            end.record()
-            torch.cuda.synchronize()  # wait for all_reduce to complete
-            total_time = start.elapsed_time(end)/(1000*60)
-            performance_metrics.update({"total_time in minutes ": total_time})
-
-        trainer.save_model()  # Saves the tokenizer too for easy upload
-        train_metrics = train_result.metrics
-        max_train_samples = (
-            data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset)
-        )
-        train_metrics["train_samples"] = min(max_train_samples, len(train_dataset))
-        trainer.log_metrics("train", train_metrics)
-        trainer.save_metrics("train", train_metrics)
-        trainer.save_state()
-
-    if torch.cuda.is_available() and training_args.compute_memory:
-        peak_memory = (torch.cuda.max_memory_allocated() / 1024 ** 2)/1000
-        print(
-            "Memory utilization",
-            peak_memory,
-            "GB"
-        )
-        performance_metrics.update({"peak_memory": peak_memory})
-    if training_args.compute_memory or training_args.compute_time:
-        print(performance_metrics)
-        trainer.save_metrics("performance", performance_metrics)
-
-    # Evaluation
-    results = {}
-    if training_args.do_eval:
-        logger.info("*** Evaluate ***")
-        for task, eval_dataset in eval_datasets.items():
-            metrics = trainer.evaluate(eval_dataset=eval_dataset,
-               max_length=data_args.val_max_target_length, num_beams=data_args.num_beams,
-            )
-            trainer.log_metrics("eval", metrics)
-            trainer.save_metrics("eval", metrics)
-        results['evaluate'] = metrics
-
-    # Test
-    if training_args.do_test:
-        logger.info("*** Test ***")
-        for task, test_dataset in test_datasets.items():
-            metrics = trainer.evaluate(eval_dataset=test_dataset,
-              max_length=data_args.test_max_target_length, num_beams=data_args.num_beams,
-              metric_key_prefix="test"
-            )
-            trainer.log_metrics("test", metrics)
-            trainer.save_metrics("test", metrics)
-        results['test'] = metrics
-
-    repo_name = create_hub_repo_name(root="DeltaHub",
-                         dataset=data_args.task_name,
-                         delta_type = delta_args.delta_type,
-                         model_name_or_path= model_args.model_name_or_path)
-    results['repo_name'] = repo_name
-    if delta_args.delta_type.lower() != "none":
-        if training_args.push_to_hub: # TODO add description here
-            delta_model.save_finetuned(push_to_hub=True, save_directory=repo_name, use_auth_token=True)
-            # trainer.push_to_hub(**kwargs)
-        else:
-            delta_model.save_finetuned(push_to_hub=False, save_directory=repo_name, use_auth_token=True)
-
-    with open(f"{training_args.output_dir}/results.json", 'w') as fout:
-        string = json.dumps(results, indent=4,sort_keys=True)
-        fout.write(string+"\n")
-
-    return results
-
-
-
-
-if __name__ == "__main__":
-    result = main()
-
diff --git a/examples/examples_prompt/run.sh b/examples/examples_prompt/run.sh
deleted file mode 100644
index 9b2781e..0000000
--- a/examples/examples_prompt/run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-files=(cola mnli mrpc qnli qqp rte sst2 stsb superglue-boolq superglue-cb superglue-copa superglue-multirc superglue-record superglue-wic superglue-wsc.fixed)
-for ((i=$1; i<=$2; i++))
-do
-    dataset=${files[i]}
-    echo "id$i:$dataset"
-    TOKENIZERS_PARALLELISM=false python run.py configs/$3/$dataset.json
-done 
\ No newline at end of file
diff --git a/examples/examples_prompt/run_mlm.sh b/examples/examples_prompt/run_mlm.sh
deleted file mode 100644
index 0836f20..0000000
--- a/examples/examples_prompt/run_mlm.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-
-python configs/config_gen.py --job $3
-echo "Regenerate config"
-
-files=(cola mnli mrpc qnli qqp rte sst2 stsb superglue-boolq superglue-cb superglue-copa superglue-multirc superglue-record superglue-wic superglue-wsc.fixed)
-for ((i=$1; i<=$2; i++))
-do
-    dataset=${files[i]}
-    echo "id$i:$dataset"
-    TOKENIZERS_PARALLELISM=false python run_mlm.py configs/$3/$dataset.json
-done
\ No newline at end of file
diff --git a/examples/examples_seq2seq/configs/config_gen_bs.py b/examples/examples_seq2seq/configs/config_gen_bs.py
new file mode 100644
index 0000000..4cf3c8e
--- /dev/null
+++ b/examples/examples_seq2seq/configs/config_gen_bs.py
@@ -0,0 +1,411 @@
+import collections 
+import copy
+
+BS = 1
+AllConfigs = {}
+
+BaseConfigs = {}
+BaseConfigs['t5-base'] = {
+                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs", 
+                "max_source_length",
+                "per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
+                    "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
+                    [ 20,  20,  40,  20,   3,   3,  20,  20,  20,   3,   3,  20,   3,   3,  20],
+                    [256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
+                    # [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    # [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [BS] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [BS] * 8,
+                    [0] *7 +[0] *8,
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                ),
+                "do_train": True,
+                "do_eval": True,
+                "do_test": True,
+                
+                "model_name_or_path": "t5-base",
+                "tokenizer_name": "t5-base",
+                "save_total_limit": 1,
+                # For glue datasets.
+                "split_validation_test": True,
+                "seed": 42,
+                "dataset_config_name": ["en"],
+                "eval_dataset_config_name": ["en"],
+                "test_dataset_config_name": ["en"],
+                # other configurations.
+                "predict_with_generate": True,
+                # To evaluate during training.
+                "load_best_model_at_end": True,
+                "metric_for_best_model": "average_metrics",
+                "greater_is_better": True,
+                "evaluation_strategy": "steps",
+                "overwrite_output_dir": True,
+                "push_to_hub": True,
+                "save_strategy": "steps"
+            }
+
+BaseConfigs['t5-large'] = {
+                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs", 
+                "max_source_length",
+                "per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
+                    "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
+                    [ 20,  20,  40,  20,   3,   3,  20,  20,  20,   3,   3,  20,   3,   3,  20],
+                    [256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
+                    # [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    # [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [BS] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [BS] * 8,
+                    [0] *7 +[0] *8,
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                ),
+                "do_train": True,
+                "do_eval": True,
+                "do_test": True,
+                
+                "model_name_or_path": "/home/hushengding/plm_cache/t5-large",
+                "tokenizer_name": "/home/hushengding/plm_cache/t5-large",
+                "save_total_limit": 1,
+                # For glue datasets.
+                "split_validation_test": True,
+                "seed": 42,
+                "dataset_config_name": ["en"],
+                "eval_dataset_config_name": ["en"],
+                "test_dataset_config_name": ["en"],
+                # other configurations.
+                "predict_with_generate": True,
+                # To evaluate during training.
+                "load_best_model_at_end": True,
+                "metric_for_best_model": "average_metrics",
+                "greater_is_better": True,
+                "evaluation_strategy": "steps",
+                "overwrite_output_dir": True,
+                "push_to_hub": True,
+                "save_strategy": "steps"
+            }
+
+BaseConfigs['t5-3b'] = {
+                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs", 
+                "max_source_length",
+                "per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
+                    "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
+                    [ 20,  20,  40,  20,   3,   3,  20,  20,  20,   3,   3,  20,   3,   3,  20],
+                    [256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
+                    # [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    # [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [BS] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [BS] * 8,
+                    [0] *7 +[0] *8,
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                ),
+                "do_train": True,
+                "do_eval": True,
+                "do_test": True,
+                
+                "model_name_or_path": "/home/hushengding/plm_cache/t5-3b",
+                "tokenizer_name": "/home/hushengding/plm_cache/t5-3b",
+                "save_total_limit": 1,
+                # For glue datasets.
+                "split_validation_test": True,
+                "seed": 42,
+                "dataset_config_name": ["en"],
+                "eval_dataset_config_name": ["en"],
+                "test_dataset_config_name": ["en"],
+                # other configurations.
+                "predict_with_generate": True,
+                # To evaluate during training.
+                "load_best_model_at_end": True,
+                "metric_for_best_model": "average_metrics",
+                "greater_is_better": True,
+                "evaluation_strategy": "steps",
+                "overwrite_output_dir": True,
+                "push_to_hub": True,
+                "save_strategy": "steps"
+            }
+
+AllConfigs['bitfit_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['bitfit_t5-base'].update({
+                "delta_type": "bitfit",      
+                "learning_rate": 3e-4,         
+                "output_dir": "outputs/bitfit/t5-base/",
+            })
+
+
+
+AllConfigs['adapter_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['adapter_t5-base'].update({
+                                "delta_type": "adapter",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "bottleneck_dim":24,
+                                "output_dir": "outputs/adapter/t5-base/",
+                            })
+
+AllConfigs['lora_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['lora_t5-base'].update({
+                                "delta_type": "lora",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "lora_r": 8,
+                                "output_dir": "outputs/lora/t5-base/",
+                            })
+
+AllConfigs['compacter_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['compacter_t5-base'].update({
+                                "delta_type": "compacter",
+                                "learning_rate": 3e-3,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "output_dir": "outputs/compacter/t5-base/",
+                                "non_linearity": "gelu_new",
+
+                                #Compacter.
+                                "hypercomplex_division": 4, 
+                                "hypercomplex_adapters": True,
+                                "hypercomplex_nonlinearity": "glorot-uniform",
+                                # gradient clip and clamp 
+                                "gradient_clip": False,
+                                "phm_clamp": False,
+                                "normalize_phm_weight": False, 
+                                "learn_phm": True,
+                                # shared one side 
+                                "factorized_phm": True, 
+                                "shared_phm_rule": False,
+                                "factorized_phm_rule": False,
+                                "phm_c_init": "normal",
+                                "phm_init_range": 0.0001,
+                                "use_bias_down_sampler": True,
+                                "use_bias_up_sampler": True,
+                            })
+
+AllConfigs['compacter++_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['compacter++_t5-base'].update({
+                                "delta_type": "compacter",
+                                "learning_rate": 3e-3,
+                                "do_train": True,
+                                "do_eval": True,
+                                "do_test": True,
+                                "modified_modules": [
+                                    "DenseReluDense"
+                                ],
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "output_dir": "outputs/compacter++/t5-base/",
+                                "non_linearity": "gelu_new",
+
+                                #Compacter.
+                                "hypercomplex_division": 4, 
+                                "hypercomplex_adapters": True,
+                                "hypercomplex_nonlinearity": "glorot-uniform",
+                                # gradient clip and clamp 
+                                "gradient_clip": False,
+                                "phm_clamp": False,
+                                "normalize_phm_weight": False, 
+                                "learn_phm": True,
+                                # shared one side 
+                                "factorized_phm": True, 
+                                "shared_phm_rule": False,
+                                "factorized_phm_rule": False,
+                                "phm_c_init": "normal",
+                                "phm_init_range": 0.0001,
+                                "use_bias_down_sampler": True,
+                                "use_bias_up_sampler": True,
+                            })
+
+
+AllConfigs['low_rank_adapter_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['low_rank_adapter_t5-base'].update({
+                                "delta_type": "low_rank_adapter",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "output_dir": "outputs/low_rank_adapter/t5-base/",
+                                "non_linearity": "gelu_new",
+                                "low_rank_w_init": "glorot-uniform", 
+                                "low_rank_rank": 1,
+                            })
+
+
+AllConfigs['soft_prompt_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['soft_prompt_t5-base'].update({
+                                "delta_type": "soft_prompt",
+                                "learning_rate": 3e-2,
+                                "soft_token_num":100,
+                                "token_init": False,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/soft_prompt/t5-base/",
+                            })
+
+AllConfigs['prefix_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['prefix_t5-base'].update({
+                                "delta_type": "prefix",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/prefix/t5-base/",
+                            })
+
+AllConfigs['none_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['none_t5-base'].update({
+                                "delta_type": "none",
+                                "learning_rate": 3e-5,
+                                "output_dir": "outputs/none/t5-base/",
+                            })
+
+AllConfigs['bitfit_t5-large'] = copy.deepcopy(BaseConfigs['t5-large'])
+AllConfigs['bitfit_t5-large'].update({
+                "delta_type": "bitfit",      
+                "learning_rate": 3e-4,         
+                "output_dir": "outputs/bitfit/t5-large/",
+            })
+
+AllConfigs['none_t5-large'] = copy.deepcopy(BaseConfigs['t5-large'])
+AllConfigs['none_t5-large'].update({
+                                "delta_type": "none",
+                                "learning_rate": 3e-5,
+                                "output_dir": "outputs/none/t5-large/",
+                            })
+
+
+AllConfigs['bitfit_t5-3b'] = copy.deepcopy(BaseConfigs['t5-3b'])
+AllConfigs['bitfit_t5-3b'].update({
+                "delta_type": "bitfit",      
+                "learning_rate": 3e-4,         
+                "output_dir": "outputs/bitfit/t5-3b/",
+            })
+
+AllConfigs['none_t5-3b'] = copy.deepcopy(BaseConfigs['t5-3b'])
+AllConfigs['none_t5-3b'].update({
+                                "delta_type": "none",
+                                "learning_rate": 3e-5,
+                                "output_dir": "outputs/none/t5-3b/",
+                            })
+
+AllConfigs['adapter_t5-3b'] = copy.deepcopy(BaseConfigs['t5-3b'])
+AllConfigs['adapter_t5-3b'].update({
+                                "delta_type": "adapter",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "bottleneck_dim":24,
+                                "output_dir": "outputs/adapter/t5-3b/",
+                            })
+
+AllConfigs['adapter_t5-large'] = copy.deepcopy(BaseConfigs['t5-large'])
+AllConfigs['adapter_t5-large'].update({
+                                "delta_type": "adapter",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "bottleneck_dim":24,
+                                "output_dir": "outputs/adapter/t5-large/",
+                            })
+
+AllConfigs['lora_t5-large'] = copy.deepcopy(BaseConfigs['t5-large'])
+AllConfigs['lora_t5-large'].update({
+                                "delta_type": "lora",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "lora_r": 8,
+                                "output_dir": "outputs/lora/t5-large/",
+                            })
+
+AllConfigs['lora_t5-3b'] = copy.deepcopy(BaseConfigs['t5-3b'])
+AllConfigs['lora_t5-3b'].update({
+                                "delta_type": "lora",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "lora_r": 8,
+                                "output_dir": "outputs/lora/t5-3b/",
+                            })
+
+                            
+if __name__ == "__main__":
+    import argparse
+    import json
+    import os
+    parser = argparse.ArgumentParser("Parser to generate configuration")
+    parser.add_argument("--job", type=str)
+    args = parser.parse_args()
+
+    config = AllConfigs[args.job]
+
+    Cartesian_product = []
+    for key in config:
+        if isinstance(key, tuple):
+            Cartesian_product.append(key)
+    all_config_jsons = {}
+    for key_tuple in Cartesian_product:
+        for zipped in config[key_tuple]:
+            job_name = zipped[0]
+            all_config_jsons[job_name] = {}
+            for key_name, zipped_elem in zip(key_tuple, zipped):
+                if key_name != 'job_name':
+                    all_config_jsons[job_name][key_name] = zipped_elem
+    for key in config:
+        if not isinstance(key, tuple):
+            for job_name in all_config_jsons:
+                if key == "output_dir":
+                    all_config_jsons[job_name][key] = config[key] + job_name
+                else:
+                    all_config_jsons[job_name][key] = config[key]
+
+
+    if not os.path.exists(f"./{args.job}_{BS}/"):
+        os.mkdir(f"./{args.job}_{BS}/")
+
+    for job_name in all_config_jsons:
+        with open(f"./{args.job}_{BS}/{job_name}.json", 'w') as fout:
+            json.dump(all_config_jsons[job_name], fout, indent=4,sort_keys=True)
+        
+    
+
+    
\ No newline at end of file
diff --git a/examples/examples_seq2seq/configs/config_gen_bs1.py b/examples/examples_seq2seq/configs/config_gen_bs1.py
new file mode 100644
index 0000000..4cf3c8e
--- /dev/null
+++ b/examples/examples_seq2seq/configs/config_gen_bs1.py
@@ -0,0 +1,411 @@
+import collections 
+import copy
+
+BS = 1
+AllConfigs = {}
+
+BaseConfigs = {}
+BaseConfigs['t5-base'] = {
+                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs", 
+                "max_source_length",
+                "per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
+                    "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
+                    [ 20,  20,  40,  20,   3,   3,  20,  20,  20,   3,   3,  20,   3,   3,  20],
+                    [256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
+                    # [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    # [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [BS] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [BS] * 8,
+                    [0] *7 +[0] *8,
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                ),
+                "do_train": True,
+                "do_eval": True,
+                "do_test": True,
+                
+                "model_name_or_path": "t5-base",
+                "tokenizer_name": "t5-base",
+                "save_total_limit": 1,
+                # For glue datasets.
+                "split_validation_test": True,
+                "seed": 42,
+                "dataset_config_name": ["en"],
+                "eval_dataset_config_name": ["en"],
+                "test_dataset_config_name": ["en"],
+                # other configurations.
+                "predict_with_generate": True,
+                # To evaluate during training.
+                "load_best_model_at_end": True,
+                "metric_for_best_model": "average_metrics",
+                "greater_is_better": True,
+                "evaluation_strategy": "steps",
+                "overwrite_output_dir": True,
+                "push_to_hub": True,
+                "save_strategy": "steps"
+            }
+
+BaseConfigs['t5-large'] = {
+                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs", 
+                "max_source_length",
+                "per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
+                    "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
+                    [ 20,  20,  40,  20,   3,   3,  20,  20,  20,   3,   3,  20,   3,   3,  20],
+                    [256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
+                    # [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    # [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [BS] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [BS] * 8,
+                    [0] *7 +[0] *8,
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                ),
+                "do_train": True,
+                "do_eval": True,
+                "do_test": True,
+                
+                "model_name_or_path": "/home/hushengding/plm_cache/t5-large",
+                "tokenizer_name": "/home/hushengding/plm_cache/t5-large",
+                "save_total_limit": 1,
+                # For glue datasets.
+                "split_validation_test": True,
+                "seed": 42,
+                "dataset_config_name": ["en"],
+                "eval_dataset_config_name": ["en"],
+                "test_dataset_config_name": ["en"],
+                # other configurations.
+                "predict_with_generate": True,
+                # To evaluate during training.
+                "load_best_model_at_end": True,
+                "metric_for_best_model": "average_metrics",
+                "greater_is_better": True,
+                "evaluation_strategy": "steps",
+                "overwrite_output_dir": True,
+                "push_to_hub": True,
+                "save_strategy": "steps"
+            }
+
+BaseConfigs['t5-3b'] = {
+                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs", 
+                "max_source_length",
+                "per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
+                    "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
+                    [ 20,  20,  40,  20,   3,   3,  20,  20,  20,   3,   3,  20,   3,   3,  20],
+                    [256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
+                    # [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    # [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [BS] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [BS] * 8,
+                    [0] *7 +[0] *8,
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                ),
+                "do_train": True,
+                "do_eval": True,
+                "do_test": True,
+                
+                "model_name_or_path": "/home/hushengding/plm_cache/t5-3b",
+                "tokenizer_name": "/home/hushengding/plm_cache/t5-3b",
+                "save_total_limit": 1,
+                # For glue datasets.
+                "split_validation_test": True,
+                "seed": 42,
+                "dataset_config_name": ["en"],
+                "eval_dataset_config_name": ["en"],
+                "test_dataset_config_name": ["en"],
+                # other configurations.
+                "predict_with_generate": True,
+                # To evaluate during training.
+                "load_best_model_at_end": True,
+                "metric_for_best_model": "average_metrics",
+                "greater_is_better": True,
+                "evaluation_strategy": "steps",
+                "overwrite_output_dir": True,
+                "push_to_hub": True,
+                "save_strategy": "steps"
+            }
+
+AllConfigs['bitfit_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['bitfit_t5-base'].update({
+                "delta_type": "bitfit",      
+                "learning_rate": 3e-4,         
+                "output_dir": "outputs/bitfit/t5-base/",
+            })
+
+
+
+AllConfigs['adapter_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['adapter_t5-base'].update({
+                                "delta_type": "adapter",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "bottleneck_dim":24,
+                                "output_dir": "outputs/adapter/t5-base/",
+                            })
+
+AllConfigs['lora_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['lora_t5-base'].update({
+                                "delta_type": "lora",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "lora_r": 8,
+                                "output_dir": "outputs/lora/t5-base/",
+                            })
+
+AllConfigs['compacter_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['compacter_t5-base'].update({
+                                "delta_type": "compacter",
+                                "learning_rate": 3e-3,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "output_dir": "outputs/compacter/t5-base/",
+                                "non_linearity": "gelu_new",
+
+                                #Compacter.
+                                "hypercomplex_division": 4, 
+                                "hypercomplex_adapters": True,
+                                "hypercomplex_nonlinearity": "glorot-uniform",
+                                # gradient clip and clamp 
+                                "gradient_clip": False,
+                                "phm_clamp": False,
+                                "normalize_phm_weight": False, 
+                                "learn_phm": True,
+                                # shared one side 
+                                "factorized_phm": True, 
+                                "shared_phm_rule": False,
+                                "factorized_phm_rule": False,
+                                "phm_c_init": "normal",
+                                "phm_init_range": 0.0001,
+                                "use_bias_down_sampler": True,
+                                "use_bias_up_sampler": True,
+                            })
+
+AllConfigs['compacter++_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['compacter++_t5-base'].update({
+                                "delta_type": "compacter",
+                                "learning_rate": 3e-3,
+                                "do_train": True,
+                                "do_eval": True,
+                                "do_test": True,
+                                "modified_modules": [
+                                    "DenseReluDense"
+                                ],
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "output_dir": "outputs/compacter++/t5-base/",
+                                "non_linearity": "gelu_new",
+
+                                #Compacter.
+                                "hypercomplex_division": 4, 
+                                "hypercomplex_adapters": True,
+                                "hypercomplex_nonlinearity": "glorot-uniform",
+                                # gradient clip and clamp 
+                                "gradient_clip": False,
+                                "phm_clamp": False,
+                                "normalize_phm_weight": False, 
+                                "learn_phm": True,
+                                # shared one side 
+                                "factorized_phm": True, 
+                                "shared_phm_rule": False,
+                                "factorized_phm_rule": False,
+                                "phm_c_init": "normal",
+                                "phm_init_range": 0.0001,
+                                "use_bias_down_sampler": True,
+                                "use_bias_up_sampler": True,
+                            })
+
+
+AllConfigs['low_rank_adapter_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['low_rank_adapter_t5-base'].update({
+                                "delta_type": "low_rank_adapter",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "output_dir": "outputs/low_rank_adapter/t5-base/",
+                                "non_linearity": "gelu_new",
+                                "low_rank_w_init": "glorot-uniform", 
+                                "low_rank_rank": 1,
+                            })
+
+
+AllConfigs['soft_prompt_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['soft_prompt_t5-base'].update({
+                                "delta_type": "soft_prompt",
+                                "learning_rate": 3e-2,
+                                "soft_token_num":100,
+                                "token_init": False,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/soft_prompt/t5-base/",
+                            })
+
+AllConfigs['prefix_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['prefix_t5-base'].update({
+                                "delta_type": "prefix",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/prefix/t5-base/",
+                            })
+
+AllConfigs['none_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['none_t5-base'].update({
+                                "delta_type": "none",
+                                "learning_rate": 3e-5,
+                                "output_dir": "outputs/none/t5-base/",
+                            })
+
+AllConfigs['bitfit_t5-large'] = copy.deepcopy(BaseConfigs['t5-large'])
+AllConfigs['bitfit_t5-large'].update({
+                "delta_type": "bitfit",      
+                "learning_rate": 3e-4,         
+                "output_dir": "outputs/bitfit/t5-large/",
+            })
+
+AllConfigs['none_t5-large'] = copy.deepcopy(BaseConfigs['t5-large'])
+AllConfigs['none_t5-large'].update({
+                                "delta_type": "none",
+                                "learning_rate": 3e-5,
+                                "output_dir": "outputs/none/t5-large/",
+                            })
+
+
+AllConfigs['bitfit_t5-3b'] = copy.deepcopy(BaseConfigs['t5-3b'])
+AllConfigs['bitfit_t5-3b'].update({
+                "delta_type": "bitfit",      
+                "learning_rate": 3e-4,         
+                "output_dir": "outputs/bitfit/t5-3b/",
+            })
+
+AllConfigs['none_t5-3b'] = copy.deepcopy(BaseConfigs['t5-3b'])
+AllConfigs['none_t5-3b'].update({
+                                "delta_type": "none",
+                                "learning_rate": 3e-5,
+                                "output_dir": "outputs/none/t5-3b/",
+                            })
+
+AllConfigs['adapter_t5-3b'] = copy.deepcopy(BaseConfigs['t5-3b'])
+AllConfigs['adapter_t5-3b'].update({
+                                "delta_type": "adapter",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "bottleneck_dim":24,
+                                "output_dir": "outputs/adapter/t5-3b/",
+                            })
+
+AllConfigs['adapter_t5-large'] = copy.deepcopy(BaseConfigs['t5-large'])
+AllConfigs['adapter_t5-large'].update({
+                                "delta_type": "adapter",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "bottleneck_dim":24,
+                                "output_dir": "outputs/adapter/t5-large/",
+                            })
+
+AllConfigs['lora_t5-large'] = copy.deepcopy(BaseConfigs['t5-large'])
+AllConfigs['lora_t5-large'].update({
+                                "delta_type": "lora",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "lora_r": 8,
+                                "output_dir": "outputs/lora/t5-large/",
+                            })
+
+AllConfigs['lora_t5-3b'] = copy.deepcopy(BaseConfigs['t5-3b'])
+AllConfigs['lora_t5-3b'].update({
+                                "delta_type": "lora",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "lora_r": 8,
+                                "output_dir": "outputs/lora/t5-3b/",
+                            })
+
+                            
+if __name__ == "__main__":
+    import argparse
+    import json
+    import os
+    parser = argparse.ArgumentParser("Parser to generate configuration")
+    parser.add_argument("--job", type=str)
+    args = parser.parse_args()
+
+    config = AllConfigs[args.job]
+
+    Cartesian_product = []
+    for key in config:
+        if isinstance(key, tuple):
+            Cartesian_product.append(key)
+    all_config_jsons = {}
+    for key_tuple in Cartesian_product:
+        for zipped in config[key_tuple]:
+            job_name = zipped[0]
+            all_config_jsons[job_name] = {}
+            for key_name, zipped_elem in zip(key_tuple, zipped):
+                if key_name != 'job_name':
+                    all_config_jsons[job_name][key_name] = zipped_elem
+    for key in config:
+        if not isinstance(key, tuple):
+            for job_name in all_config_jsons:
+                if key == "output_dir":
+                    all_config_jsons[job_name][key] = config[key] + job_name
+                else:
+                    all_config_jsons[job_name][key] = config[key]
+
+
+    if not os.path.exists(f"./{args.job}_{BS}/"):
+        os.mkdir(f"./{args.job}_{BS}/")
+
+    for job_name in all_config_jsons:
+        with open(f"./{args.job}_{BS}/{job_name}.json", 'w') as fout:
+            json.dump(all_config_jsons[job_name], fout, indent=4,sort_keys=True)
+        
+    
+
+    
\ No newline at end of file
diff --git a/examples/examples_seq2seq/configs/config_gen_bs64.py b/examples/examples_seq2seq/configs/config_gen_bs64.py
new file mode 100644
index 0000000..90426fe
--- /dev/null
+++ b/examples/examples_seq2seq/configs/config_gen_bs64.py
@@ -0,0 +1,411 @@
+import collections 
+import copy
+
+BS = 64
+AllConfigs = {}
+
+BaseConfigs = {}
+BaseConfigs['t5-base'] = {
+                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs", 
+                "max_source_length",
+                "per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
+                    "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
+                    [ 20,  20,  40,  20,   3,   3,  20,  20,  20,   3,   3,  20,   3,   3,  20],
+                    [256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
+                    # [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    # [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [BS] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [BS] * 8,
+                    [0] *7 +[0] *8,
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                ),
+                "do_train": True,
+                "do_eval": True,
+                "do_test": True,
+                
+                "model_name_or_path": "t5-base",
+                "tokenizer_name": "t5-base",
+                "save_total_limit": 1,
+                # For glue datasets.
+                "split_validation_test": True,
+                "seed": 42,
+                "dataset_config_name": ["en"],
+                "eval_dataset_config_name": ["en"],
+                "test_dataset_config_name": ["en"],
+                # other configurations.
+                "predict_with_generate": True,
+                # To evaluate during training.
+                "load_best_model_at_end": True,
+                "metric_for_best_model": "average_metrics",
+                "greater_is_better": True,
+                "evaluation_strategy": "steps",
+                "overwrite_output_dir": True,
+                "push_to_hub": True,
+                "save_strategy": "steps"
+            }
+
+BaseConfigs['t5-large'] = {
+                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs", 
+                "max_source_length",
+                "per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
+                    "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
+                    [ 20,  20,  40,  20,   3,   3,  20,  20,  20,   3,   3,  20,   3,   3,  20],
+                    [256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
+                    # [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    # [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [BS] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [BS] * 8,
+                    [0] *7 +[0] *8,
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                ),
+                "do_train": True,
+                "do_eval": True,
+                "do_test": True,
+                
+                "model_name_or_path": "/home/hushengding/plm_cache/t5-large",
+                "tokenizer_name": "/home/hushengding/plm_cache/t5-large",
+                "save_total_limit": 1,
+                # For glue datasets.
+                "split_validation_test": True,
+                "seed": 42,
+                "dataset_config_name": ["en"],
+                "eval_dataset_config_name": ["en"],
+                "test_dataset_config_name": ["en"],
+                # other configurations.
+                "predict_with_generate": True,
+                # To evaluate during training.
+                "load_best_model_at_end": True,
+                "metric_for_best_model": "average_metrics",
+                "greater_is_better": True,
+                "evaluation_strategy": "steps",
+                "overwrite_output_dir": True,
+                "push_to_hub": True,
+                "save_strategy": "steps"
+            }
+
+BaseConfigs['t5-3b'] = {
+                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs", 
+                "max_source_length",
+                "per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
+                    "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
+                    [ 20,  20,  40,  20,   3,   3,  20,  20,  20,   3,   3,  20,   3,   3,  20],
+                    [256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
+                    # [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    # [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [BS] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [BS] * 8,
+                    [0] *7 +[0] *8,
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                ),
+                "do_train": True,
+                "do_eval": True,
+                "do_test": True,
+                
+                "model_name_or_path": "/home/hushengding/plm_cache/t5-3b",
+                "tokenizer_name": "/home/hushengding/plm_cache/t5-3b",
+                "save_total_limit": 1,
+                # For glue datasets.
+                "split_validation_test": True,
+                "seed": 42,
+                "dataset_config_name": ["en"],
+                "eval_dataset_config_name": ["en"],
+                "test_dataset_config_name": ["en"],
+                # other configurations.
+                "predict_with_generate": True,
+                # To evaluate during training.
+                "load_best_model_at_end": True,
+                "metric_for_best_model": "average_metrics",
+                "greater_is_better": True,
+                "evaluation_strategy": "steps",
+                "overwrite_output_dir": True,
+                "push_to_hub": True,
+                "save_strategy": "steps"
+            }
+
+AllConfigs['bitfit_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['bitfit_t5-base'].update({
+                "delta_type": "bitfit",      
+                "learning_rate": 3e-4,         
+                "output_dir": "outputs/bitfit/t5-base/",
+            })
+
+
+
+AllConfigs['adapter_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['adapter_t5-base'].update({
+                                "delta_type": "adapter",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "bottleneck_dim":24,
+                                "output_dir": "outputs/adapter/t5-base/",
+                            })
+
+AllConfigs['lora_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['lora_t5-base'].update({
+                                "delta_type": "lora",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "lora_r": 8,
+                                "output_dir": "outputs/lora/t5-base/",
+                            })
+
+AllConfigs['compacter_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['compacter_t5-base'].update({
+                                "delta_type": "compacter",
+                                "learning_rate": 3e-3,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "output_dir": "outputs/compacter/t5-base/",
+                                "non_linearity": "gelu_new",
+
+                                #Compacter.
+                                "hypercomplex_division": 4, 
+                                "hypercomplex_adapters": True,
+                                "hypercomplex_nonlinearity": "glorot-uniform",
+                                # gradient clip and clamp 
+                                "gradient_clip": False,
+                                "phm_clamp": False,
+                                "normalize_phm_weight": False, 
+                                "learn_phm": True,
+                                # shared one side 
+                                "factorized_phm": True, 
+                                "shared_phm_rule": False,
+                                "factorized_phm_rule": False,
+                                "phm_c_init": "normal",
+                                "phm_init_range": 0.0001,
+                                "use_bias_down_sampler": True,
+                                "use_bias_up_sampler": True,
+                            })
+
+AllConfigs['compacter++_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['compacter++_t5-base'].update({
+                                "delta_type": "compacter",
+                                "learning_rate": 3e-3,
+                                "do_train": True,
+                                "do_eval": True,
+                                "do_test": True,
+                                "modified_modules": [
+                                    "DenseReluDense"
+                                ],
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "output_dir": "outputs/compacter++/t5-base/",
+                                "non_linearity": "gelu_new",
+
+                                #Compacter.
+                                "hypercomplex_division": 4, 
+                                "hypercomplex_adapters": True,
+                                "hypercomplex_nonlinearity": "glorot-uniform",
+                                # gradient clip and clamp 
+                                "gradient_clip": False,
+                                "phm_clamp": False,
+                                "normalize_phm_weight": False, 
+                                "learn_phm": True,
+                                # shared one side 
+                                "factorized_phm": True, 
+                                "shared_phm_rule": False,
+                                "factorized_phm_rule": False,
+                                "phm_c_init": "normal",
+                                "phm_init_range": 0.0001,
+                                "use_bias_down_sampler": True,
+                                "use_bias_up_sampler": True,
+                            })
+
+
+AllConfigs['low_rank_adapter_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['low_rank_adapter_t5-base'].update({
+                                "delta_type": "low_rank_adapter",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "output_dir": "outputs/low_rank_adapter/t5-base/",
+                                "non_linearity": "gelu_new",
+                                "low_rank_w_init": "glorot-uniform", 
+                                "low_rank_rank": 1,
+                            })
+
+
+AllConfigs['soft_prompt_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['soft_prompt_t5-base'].update({
+                                "delta_type": "soft_prompt",
+                                "learning_rate": 3e-2,
+                                "soft_token_num":100,
+                                "token_init": False,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/soft_prompt/t5-base/",
+                            })
+
+AllConfigs['prefix_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['prefix_t5-base'].update({
+                                "delta_type": "prefix",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/prefix/t5-base/",
+                            })
+
+AllConfigs['none_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['none_t5-base'].update({
+                                "delta_type": "none",
+                                "learning_rate": 3e-5,
+                                "output_dir": "outputs/none/t5-base/",
+                            })
+
+AllConfigs['bitfit_t5-large'] = copy.deepcopy(BaseConfigs['t5-large'])
+AllConfigs['bitfit_t5-large'].update({
+                "delta_type": "bitfit",      
+                "learning_rate": 3e-4,         
+                "output_dir": "outputs/bitfit/t5-large/",
+            })
+
+AllConfigs['none_t5-large'] = copy.deepcopy(BaseConfigs['t5-large'])
+AllConfigs['none_t5-large'].update({
+                                "delta_type": "none",
+                                "learning_rate": 3e-5,
+                                "output_dir": "outputs/none/t5-large/",
+                            })
+
+
+AllConfigs['bitfit_t5-3b'] = copy.deepcopy(BaseConfigs['t5-3b'])
+AllConfigs['bitfit_t5-3b'].update({
+                "delta_type": "bitfit",      
+                "learning_rate": 3e-4,         
+                "output_dir": "outputs/bitfit/t5-3b/",
+            })
+
+AllConfigs['none_t5-3b'] = copy.deepcopy(BaseConfigs['t5-3b'])
+AllConfigs['none_t5-3b'].update({
+                                "delta_type": "none",
+                                "learning_rate": 3e-5,
+                                "output_dir": "outputs/none/t5-3b/",
+                            })
+
+AllConfigs['adapter_t5-3b'] = copy.deepcopy(BaseConfigs['t5-3b'])
+AllConfigs['adapter_t5-3b'].update({
+                                "delta_type": "adapter",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "bottleneck_dim":24,
+                                "output_dir": "outputs/adapter/t5-3b/",
+                            })
+
+AllConfigs['adapter_t5-large'] = copy.deepcopy(BaseConfigs['t5-large'])
+AllConfigs['adapter_t5-large'].update({
+                                "delta_type": "adapter",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "bottleneck_dim":24,
+                                "output_dir": "outputs/adapter/t5-large/",
+                            })
+
+AllConfigs['lora_t5-large'] = copy.deepcopy(BaseConfigs['t5-large'])
+AllConfigs['lora_t5-large'].update({
+                                "delta_type": "lora",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "lora_r": 8,
+                                "output_dir": "outputs/lora/t5-large/",
+                            })
+
+AllConfigs['lora_t5-3b'] = copy.deepcopy(BaseConfigs['t5-3b'])
+AllConfigs['lora_t5-3b'].update({
+                                "delta_type": "lora",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "lora_r": 8,
+                                "output_dir": "outputs/lora/t5-3b/",
+                            })
+
+                            
+if __name__ == "__main__":
+    import argparse
+    import json
+    import os
+    parser = argparse.ArgumentParser("Parser to generate configuration")
+    parser.add_argument("--job", type=str)
+    args = parser.parse_args()
+
+    config = AllConfigs[args.job]
+
+    Cartesian_product = []
+    for key in config:
+        if isinstance(key, tuple):
+            Cartesian_product.append(key)
+    all_config_jsons = {}
+    for key_tuple in Cartesian_product:
+        for zipped in config[key_tuple]:
+            job_name = zipped[0]
+            all_config_jsons[job_name] = {}
+            for key_name, zipped_elem in zip(key_tuple, zipped):
+                if key_name != 'job_name':
+                    all_config_jsons[job_name][key_name] = zipped_elem
+    for key in config:
+        if not isinstance(key, tuple):
+            for job_name in all_config_jsons:
+                if key == "output_dir":
+                    all_config_jsons[job_name][key] = config[key] + job_name
+                else:
+                    all_config_jsons[job_name][key] = config[key]
+
+
+    if not os.path.exists(f"./{args.job}_{BS}/"):
+        os.mkdir(f"./{args.job}_{BS}/")
+
+    for job_name in all_config_jsons:
+        with open(f"./{args.job}_{BS}/{job_name}.json", 'w') as fout:
+            json.dump(all_config_jsons[job_name], fout, indent=4,sort_keys=True)
+        
+    
+
+    
\ No newline at end of file
diff --git a/examples/examples_seq2seq/configs/config_gen_bs8.py b/examples/examples_seq2seq/configs/config_gen_bs8.py
new file mode 100644
index 0000000..5e48edb
--- /dev/null
+++ b/examples/examples_seq2seq/configs/config_gen_bs8.py
@@ -0,0 +1,411 @@
+import collections 
+import copy
+
+BS = 8
+AllConfigs = {}
+
+BaseConfigs = {}
+BaseConfigs['t5-base'] = {
+                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs", 
+                "max_source_length",
+                "per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
+                    "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
+                    [ 20,  20,  40,  20,   3,   3,  20,  20,  20,   3,   3,  20,   3,   3,  20],
+                    [256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
+                    # [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    # [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [BS] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [BS] * 8,
+                    [0] *7 +[0] *8,
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                ),
+                "do_train": True,
+                "do_eval": True,
+                "do_test": True,
+                
+                "model_name_or_path": "t5-base",
+                "tokenizer_name": "t5-base",
+                "save_total_limit": 1,
+                # For glue datasets.
+                "split_validation_test": True,
+                "seed": 42,
+                "dataset_config_name": ["en"],
+                "eval_dataset_config_name": ["en"],
+                "test_dataset_config_name": ["en"],
+                # other configurations.
+                "predict_with_generate": True,
+                # To evaluate during training.
+                "load_best_model_at_end": True,
+                "metric_for_best_model": "average_metrics",
+                "greater_is_better": True,
+                "evaluation_strategy": "steps",
+                "overwrite_output_dir": True,
+                "push_to_hub": True,
+                "save_strategy": "steps"
+            }
+
+BaseConfigs['t5-large'] = {
+                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs", 
+                "max_source_length",
+                "per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
+                    "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
+                    [ 20,  20,  40,  20,   3,   3,  20,  20,  20,   3,   3,  20,   3,   3,  20],
+                    [256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
+                    # [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    # [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [BS] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [BS] * 8,
+                    [0] *7 +[0] *8,
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                ),
+                "do_train": True,
+                "do_eval": True,
+                "do_test": True,
+                
+                "model_name_or_path": "/home/hushengding/plm_cache/t5-large",
+                "tokenizer_name": "/home/hushengding/plm_cache/t5-large",
+                "save_total_limit": 1,
+                # For glue datasets.
+                "split_validation_test": True,
+                "seed": 42,
+                "dataset_config_name": ["en"],
+                "eval_dataset_config_name": ["en"],
+                "test_dataset_config_name": ["en"],
+                # other configurations.
+                "predict_with_generate": True,
+                # To evaluate during training.
+                "load_best_model_at_end": True,
+                "metric_for_best_model": "average_metrics",
+                "greater_is_better": True,
+                "evaluation_strategy": "steps",
+                "overwrite_output_dir": True,
+                "push_to_hub": True,
+                "save_strategy": "steps"
+            }
+
+BaseConfigs['t5-3b'] = {
+                ("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs", 
+                "max_source_length",
+                "per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
+                    "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte",  "mnli", "qqp", "stsb"], 
+                    ["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
+                    [ 20,  20,  40,  20,   3,   3,  20,  20,  20,   3,   3,  20,   3,   3,  20],
+                    [256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
+                    # [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    # [ 32,  32,  32,  32,  32,  16,  32] + [32] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [BS] * 8,
+                    [ 32,  32,  32,  32,  32,  16,  32] + [BS] * 8,
+                    [0] *7 +[0] *8,
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                    [200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
+                ),
+                "do_train": True,
+                "do_eval": True,
+                "do_test": True,
+                
+                "model_name_or_path": "/home/hushengding/plm_cache/t5-3b",
+                "tokenizer_name": "/home/hushengding/plm_cache/t5-3b",
+                "save_total_limit": 1,
+                # For glue datasets.
+                "split_validation_test": True,
+                "seed": 42,
+                "dataset_config_name": ["en"],
+                "eval_dataset_config_name": ["en"],
+                "test_dataset_config_name": ["en"],
+                # other configurations.
+                "predict_with_generate": True,
+                # To evaluate during training.
+                "load_best_model_at_end": True,
+                "metric_for_best_model": "average_metrics",
+                "greater_is_better": True,
+                "evaluation_strategy": "steps",
+                "overwrite_output_dir": True,
+                "push_to_hub": True,
+                "save_strategy": "steps"
+            }
+
+AllConfigs['bitfit_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['bitfit_t5-base'].update({
+                "delta_type": "bitfit",      
+                "learning_rate": 3e-4,         
+                "output_dir": "outputs/bitfit/t5-base/",
+            })
+
+
+
+AllConfigs['adapter_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['adapter_t5-base'].update({
+                                "delta_type": "adapter",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "bottleneck_dim":24,
+                                "output_dir": "outputs/adapter/t5-base/",
+                            })
+
+AllConfigs['lora_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['lora_t5-base'].update({
+                                "delta_type": "lora",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "lora_r": 8,
+                                "output_dir": "outputs/lora/t5-base/",
+                            })
+
+AllConfigs['compacter_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['compacter_t5-base'].update({
+                                "delta_type": "compacter",
+                                "learning_rate": 3e-3,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "output_dir": "outputs/compacter/t5-base/",
+                                "non_linearity": "gelu_new",
+
+                                #Compacter.
+                                "hypercomplex_division": 4, 
+                                "hypercomplex_adapters": True,
+                                "hypercomplex_nonlinearity": "glorot-uniform",
+                                # gradient clip and clamp 
+                                "gradient_clip": False,
+                                "phm_clamp": False,
+                                "normalize_phm_weight": False, 
+                                "learn_phm": True,
+                                # shared one side 
+                                "factorized_phm": True, 
+                                "shared_phm_rule": False,
+                                "factorized_phm_rule": False,
+                                "phm_c_init": "normal",
+                                "phm_init_range": 0.0001,
+                                "use_bias_down_sampler": True,
+                                "use_bias_up_sampler": True,
+                            })
+
+AllConfigs['compacter++_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['compacter++_t5-base'].update({
+                                "delta_type": "compacter",
+                                "learning_rate": 3e-3,
+                                "do_train": True,
+                                "do_eval": True,
+                                "do_test": True,
+                                "modified_modules": [
+                                    "DenseReluDense"
+                                ],
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "output_dir": "outputs/compacter++/t5-base/",
+                                "non_linearity": "gelu_new",
+
+                                #Compacter.
+                                "hypercomplex_division": 4, 
+                                "hypercomplex_adapters": True,
+                                "hypercomplex_nonlinearity": "glorot-uniform",
+                                # gradient clip and clamp 
+                                "gradient_clip": False,
+                                "phm_clamp": False,
+                                "normalize_phm_weight": False, 
+                                "learn_phm": True,
+                                # shared one side 
+                                "factorized_phm": True, 
+                                "shared_phm_rule": False,
+                                "factorized_phm_rule": False,
+                                "phm_c_init": "normal",
+                                "phm_init_range": 0.0001,
+                                "use_bias_down_sampler": True,
+                                "use_bias_up_sampler": True,
+                            })
+
+
+AllConfigs['low_rank_adapter_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['low_rank_adapter_t5-base'].update({
+                                "delta_type": "low_rank_adapter",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "output_dir": "outputs/low_rank_adapter/t5-base/",
+                                "non_linearity": "gelu_new",
+                                "low_rank_w_init": "glorot-uniform", 
+                                "low_rank_rank": 1,
+                            })
+
+
+AllConfigs['soft_prompt_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['soft_prompt_t5-base'].update({
+                                "delta_type": "soft_prompt",
+                                "learning_rate": 3e-2,
+                                "soft_token_num":100,
+                                "token_init": False,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/soft_prompt/t5-base/",
+                            })
+
+AllConfigs['prefix_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['prefix_t5-base'].update({
+                                "delta_type": "prefix",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                ],
+                                "output_dir": "outputs/prefix/t5-base/",
+                            })
+
+AllConfigs['none_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
+AllConfigs['none_t5-base'].update({
+                                "delta_type": "none",
+                                "learning_rate": 3e-5,
+                                "output_dir": "outputs/none/t5-base/",
+                            })
+
+AllConfigs['bitfit_t5-large'] = copy.deepcopy(BaseConfigs['t5-large'])
+AllConfigs['bitfit_t5-large'].update({
+                "delta_type": "bitfit",      
+                "learning_rate": 3e-4,         
+                "output_dir": "outputs/bitfit/t5-large/",
+            })
+
+AllConfigs['none_t5-large'] = copy.deepcopy(BaseConfigs['t5-large'])
+AllConfigs['none_t5-large'].update({
+                                "delta_type": "none",
+                                "learning_rate": 3e-5,
+                                "output_dir": "outputs/none/t5-large/",
+                            })
+
+
+AllConfigs['bitfit_t5-3b'] = copy.deepcopy(BaseConfigs['t5-3b'])
+AllConfigs['bitfit_t5-3b'].update({
+                "delta_type": "bitfit",      
+                "learning_rate": 3e-4,         
+                "output_dir": "outputs/bitfit/t5-3b/",
+            })
+
+AllConfigs['none_t5-3b'] = copy.deepcopy(BaseConfigs['t5-3b'])
+AllConfigs['none_t5-3b'].update({
+                                "delta_type": "none",
+                                "learning_rate": 3e-5,
+                                "output_dir": "outputs/none/t5-3b/",
+                            })
+
+AllConfigs['adapter_t5-3b'] = copy.deepcopy(BaseConfigs['t5-3b'])
+AllConfigs['adapter_t5-3b'].update({
+                                "delta_type": "adapter",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "bottleneck_dim":24,
+                                "output_dir": "outputs/adapter/t5-3b/",
+                            })
+
+AllConfigs['adapter_t5-large'] = copy.deepcopy(BaseConfigs['t5-large'])
+AllConfigs['adapter_t5-large'].update({
+                                "delta_type": "adapter",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "bottleneck_dim":24,
+                                "output_dir": "outputs/adapter/t5-large/",
+                            })
+
+AllConfigs['lora_t5-large'] = copy.deepcopy(BaseConfigs['t5-large'])
+AllConfigs['lora_t5-large'].update({
+                                "delta_type": "lora",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "lora_r": 8,
+                                "output_dir": "outputs/lora/t5-large/",
+                            })
+
+AllConfigs['lora_t5-3b'] = copy.deepcopy(BaseConfigs['t5-3b'])
+AllConfigs['lora_t5-3b'].update({
+                                "delta_type": "lora",
+                                "learning_rate": 3e-4,
+                                "unfrozen_modules": [
+                                    "deltas",
+                                    "layer_norm",
+                                    "final_layer_norm"
+                                ],
+                                "lora_r": 8,
+                                "output_dir": "outputs/lora/t5-3b/",
+                            })
+
+                            
+if __name__ == "__main__":
+    import argparse
+    import json
+    import os
+    parser = argparse.ArgumentParser("Parser to generate configuration")
+    parser.add_argument("--job", type=str)
+    args = parser.parse_args()
+
+    config = AllConfigs[args.job]
+
+    Cartesian_product = []
+    for key in config:
+        if isinstance(key, tuple):
+            Cartesian_product.append(key)
+    all_config_jsons = {}
+    for key_tuple in Cartesian_product:
+        for zipped in config[key_tuple]:
+            job_name = zipped[0]
+            all_config_jsons[job_name] = {}
+            for key_name, zipped_elem in zip(key_tuple, zipped):
+                if key_name != 'job_name':
+                    all_config_jsons[job_name][key_name] = zipped_elem
+    for key in config:
+        if not isinstance(key, tuple):
+            for job_name in all_config_jsons:
+                if key == "output_dir":
+                    all_config_jsons[job_name][key] = config[key] + job_name
+                else:
+                    all_config_jsons[job_name][key] = config[key]
+
+
+    if not os.path.exists(f"./{args.job}_{BS}/"):
+        os.mkdir(f"./{args.job}_{BS}/")
+
+    for job_name in all_config_jsons:
+        with open(f"./{args.job}_{BS}/{job_name}.json", 'w') as fout:
+            json.dump(all_config_jsons[job_name], fout, indent=4,sort_keys=True)
+        
+    
+
+    
\ No newline at end of file