update examples

2022-04-22 19:18:25 +08:00 · 2022-04-22 19:18:25 +08:00 · 42747f2b81
parent 0de3cbd31d
commit 42747f2b81
33 changed files with 1184 additions and 2363 deletions
--- a/.gitignore
+++ b/.gitignore
@ -25,7 +25,7 @@ log.txt
 !examples/jupyter_notebook_examples/*.py


-!**/examples/*/configs/config_gen.py
+!**/examples/*/configs/*.py
 **/outputs_search/**/*.bin
 **/outputs_search/**/*.pt

--- a/examples/examples_bmtrain/BMTrain
+++ b/examples/examples_bmtrain/BMTrain
@ -1 +0,0 @@
-Subproject commit 058e5f25c898a1f956e3f17a0db6d62f08173e7f
--- a/examples/examples_bmtrain/BigModels
+++ b/examples/examples_bmtrain/BigModels
@ -1 +0,0 @@
-Subproject commit 3a5083d61e73bae607574a3047deafaa76b97646
--- a/examples/examples_image-classification/README.md
+++ b/examples/examples_image-classification/README.md
@ -1,50 +0,0 @@
-<!---
-Copyright 2021 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-->
-
-# Use OpenDelta in vision transformer ViT
-
-This example uses the [huggingface image classification examples](), by adding several
-lines in the original scripts.
-
-## Usage
-### 1. install necessary package
-```shell
-pip install Pillow
-pip install torchvision
-pip install transformers==4.16.2
-pip install datsets==1.18.0
-```
-
-### 2. run
-```bash
-python run_image_classification.py configs/lora_beans.json
-```
-
-Do not forget to re-install datasets back into 1.17.0 for other examples. :)
-
-
-## Possible Errors
-1. dataset connection error
-
-Solution 1: open a python console, running the error command again, may not be useful
-
-Solution 2: download the dataset by yourself on a internect connected machine, saved to disk and transfer to your server, at last load_from_disk.
-
-
-## Link to original training scripts
-You may find solution to other question about the scripts and irrelevant to Opendelta in 
-https://github.com/huggingface/transformers/tree/master/examples/pytorch/image-classification
-
--- a/examples/examples_image-classification/configs/lora_beans.json
+++ b/examples/examples_image-classification/configs/lora_beans.json
@ -1,30 +0,0 @@
-{
-    "report_to": "none",
-    "dataset_name": "beans",
-    "output_dir": "./beans_outputs/",
-    "do_train": true,
-    "do_eval": true,
-    "num_train_epochs": 5,
-    "remove_unused_columns": false,
-    "per_device_train_batch_size": 8,
-    "per_device_eval_batch_size": 8,
-    "logging_strategy": "steps",
-    "logging_steps": 10,
-    "evaluation_strategy": "epoch",
-    "save_strategy": "epoch",
-    "load_best_model_at_end": true,
-    "save_total_limit": 3,
-    "seed": 1337,
-    "delta_type": "lora",
-    "modified_modules": [
-        "attention.query",
-        "attention.value"
-    ],
-    "unfrozen_modules": [
-        "classifier",
-        "deltas"
-    ],
-    "overwrite_output_dir": true,
-    "learning_rate": 5e-4
-
-}
--- a/examples/examples_image-classification/metric.py
+++ b/examples/examples_image-classification/metric.py
@ -1,89 +0,0 @@
-# coding=utf-8
-# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Accuracy metric."""
-
-from sklearn.metrics import accuracy_score
-
-import datasets
-
-
-_DESCRIPTION = """
-Accuracy is the proportion of correct predictions among the total number of cases processed. It can be computed with:
-Accuracy = (TP + TN) / (TP + TN + FP + FN)
-TP: True positive
-TN: True negative
-FP: False positive
-FN: False negative
-"""
-
-_KWARGS_DESCRIPTION = """
-Args:
-    predictions: Predicted labels, as returned by a model.
-    references: Ground truth labels.
-    normalize: If False, return the number of correctly classified samples.
-        Otherwise, return the fraction of correctly classified samples.
-    sample_weight: Sample weights.
-Returns:
-    accuracy: Accuracy score.
-Examples:
-
-    >>> accuracy_metric = datasets.load_metric("accuracy")
-    >>> results = accuracy_metric.compute(references=[0, 1], predictions=[0, 1])
-    >>> print(results)
-    {'accuracy': 1.0}
-"""
-
-_CITATION = """\
-@article{scikit-learn,
-  title={Scikit-learn: Machine Learning in {P}ython},
-  author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
-         and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
-         and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
-         Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
-  journal={Journal of Machine Learning Research},
-  volume={12},
-  pages={2825--2830},
-  year={2011}
-}
-"""
-
-
-@datasets.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
-class Accuracy(datasets.Metric):
-    def _info(self):
-        return datasets.MetricInfo(
-            description=_DESCRIPTION,
-            citation=_CITATION,
-            inputs_description=_KWARGS_DESCRIPTION,
-            features=datasets.Features(
-                {
-                    "predictions": datasets.Sequence(datasets.Value("int32")),
-                    "references": datasets.Sequence(datasets.Value("int32")),
-                }
-                if self.config_name == "multilabel"
-                else {
-                    "predictions": datasets.Value("int32"),
-                    "references": datasets.Value("int32"),
-                }
-            ),
-            reference_urls=["https://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html"],
-        )
-
-    def _compute(self, predictions, references, normalize=True, sample_weight=None):
-        return {
-            "accuracy": float(
-                accuracy_score(references, predictions, normalize=normalize, sample_weight=sample_weight)
-            )
-        }
--- a/examples/examples_image-classification/requirements.txt
+++ b/examples/examples_image-classification/requirements.txt
@ -1,3 +0,0 @@
-# torch>=1.5.0
-torchvision>=0.6.0
-datasets>=1.8.0
--- a/examples/examples_image-classification/run_image_classification.py
+++ b/examples/examples_image-classification/run_image_classification.py
@ -1,392 +0,0 @@
-#!/usr/bin/env python
-# coding=utf-8
-# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-
-import logging
-import os
-import sys
-from dataclasses import dataclass, field
-from typing import Optional
-
-import datasets
-import numpy as np
-import torch
-from datasets import load_dataset
-from PIL import Image
-from torchvision.transforms import (
-    CenterCrop,
-    Compose,
-    Normalize,
-    RandomHorizontalFlip,
-    RandomResizedCrop,
-    Resize,
-    ToTensor,
-)
-
-import transformers
-from transformers import (
-    MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
-    AutoConfig,
-    AutoFeatureExtractor,
-    AutoModelForImageClassification,
-    HfArgumentParser,
-    Trainer,
-    TrainingArguments,
-)
-from transformers.trainer_utils import get_last_checkpoint
-from transformers.utils import check_min_version
-from transformers.utils.versions import require_version
-
-
-""" Fine-tuning a 🤗 Transformers model for image classification"""
-
-logger = logging.getLogger(__name__)
-
-# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.16.0.dev0")
-
-require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt")
-
-MODEL_CONFIG_CLASSES = list(MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING.keys())
-MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)
-
-
-def pil_loader(path: str):
-    with open(path, "rb") as f:
-        im = Image.open(f)
-        return im.convert("RGB")
-
-
-@dataclass
-class DataTrainingArguments:
-    """
-    Arguments pertaining to what data we are going to input our model for training and eval.
-    Using ``HfArgumentParser`` we can turn this class
-    into argparse arguments to be able to specify them on
-    the command line.
-    """
-
-    dataset_name: Optional[str] = field(
-        default="nateraw/image-folder", metadata={"help": "Name of a dataset from the datasets package"}
-    )
-    dataset_config_name: Optional[str] = field(
-        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
-    )
-    train_dir: Optional[str] = field(default=None, metadata={"help": "A folder containing the training data."})
-    validation_dir: Optional[str] = field(default=None, metadata={"help": "A folder containing the validation data."})
-    train_val_split: Optional[float] = field(
-        default=0.15, metadata={"help": "Percent to split off of train for validation."}
-    )
-    max_train_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
-            "value if set."
-        },
-    )
-    max_eval_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
-            "value if set."
-        },
-    )
-
-    def __post_init__(self):
-        data_files = dict()
-        if self.train_dir is not None:
-            data_files["train"] = self.train_dir
-        if self.validation_dir is not None:
-            data_files["val"] = self.validation_dir
-        self.data_files = data_files if data_files else None
-
-class RemainArgHfArgumentParser(HfArgumentParser):
-    def parse_json_file(self, json_file: str, return_remaining_args=True ):
-        """
-        Alternative helper method that does not use `argparse` at all, instead loading a json file and populating the
-        dataclass types.
-        """
-        import argparse
-        import json
-        from pathlib import Path
-        import dataclasses
-
-        data = json.loads(Path(json_file).read_text())
-        outputs = []
-        for dtype in self.dataclass_types:
-            keys = {f.name for f in dataclasses.fields(dtype) if f.init}
-            inputs = {k: data.pop(k) for k in list(data.keys()) if k in keys}
-            obj = dtype(**inputs)
-            outputs.append(obj)
-        
-        remain_args = argparse.ArgumentParser()
-        remain_args.__dict__.update(data)
-        if return_remaining_args:
-            return (*outputs, remain_args)
-        else:
-            return (*outputs,)
-
-@dataclass
-class ModelArguments:
-    """
-    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
-    """
-
-    model_name_or_path: str = field(
-        default="google/vit-base-patch16-224-in21k",
-        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"},
-    )
-    model_type: Optional[str] = field(
-        default=None,
-        metadata={"help": "If training from scratch, pass a model type from the list: " + ", ".join(MODEL_TYPES)},
-    )
-    config_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
-    )
-    cache_dir: Optional[str] = field(
-        default=None, metadata={"help": "Where do you want to store the pretrained models downloaded from s3"}
-    )
-    model_revision: str = field(
-        default="main",
-        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
-    )
-    feature_extractor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."})
-    use_auth_token: bool = field(
-        default=False,
-        metadata={
-            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
-            "with private models)."
-        },
-    )
-
-
-def collate_fn(examples):
-    pixel_values = torch.stack([example["pixel_values"] for example in examples])
-    labels = torch.tensor([example["labels"] for example in examples])
-    return {"pixel_values": pixel_values, "labels": labels}
-
-
-def main():
-    # See all possible arguments in src/transformers/training_args.py
-    # or by passing the --help flag to this script.
-    # We now keep distinct sets of args, for a cleaner separation of concerns.
-
-    parser = RemainArgHfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
-    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
-        # If we pass only one argument to the script and it's the path to a json file,
-        # let's parse it to get our arguments.
-        model_args, data_args, training_args, delta_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
-    else:
-        model_args, data_args, training_args, delta_args = parser.parse_args_into_dataclasses()
-
-    # Setup logging
-    logging.basicConfig(
-        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
-        datefmt="%m/%d/%Y %H:%M:%S",
-        handlers=[logging.StreamHandler(sys.stdout)],
-    )
-
-    log_level = training_args.get_process_log_level()
-    logger.setLevel(log_level)
-    transformers.utils.logging.set_verbosity(log_level)
-    transformers.utils.logging.enable_default_handler()
-    transformers.utils.logging.enable_explicit_format()
-
-    # Log on each process the small summary:
-    logger.warning(
-        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
-        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
-    )
-    logger.info(f"Training/evaluation parameters {training_args}")
-
-    # Detecting last checkpoint.
-    last_checkpoint = None
-    if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
-        last_checkpoint = get_last_checkpoint(training_args.output_dir)
-        if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
-            raise ValueError(
-                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
-                "Use --overwrite_output_dir to overcome."
-            )
-        elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
-            logger.info(
-                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
-                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
-            )
-
-    # Initialize our dataset and prepare it for the 'image-classification' task.
-    ds = load_dataset(
-        data_args.dataset_name,
-        data_args.dataset_config_name,
-        data_files=data_args.data_files,
-        cache_dir=model_args.cache_dir,
-        task="image-classification",
-    )
-    # If you encounter error here, try to down load the dataset by yourself and load from disk
-    # like the following two lines
-    # from datasets import load_from_disk
-    # ds = load_from_disk(f"../../../../huggingface_datasets/saved_to_disk/{data_args.dataset_name}")
-
-    # If we don't have a validation split, split off a percentage of train as validation.
-    data_args.train_val_split = None if "validation" in ds.keys() else data_args.train_val_split
-    if isinstance(data_args.train_val_split, float) and data_args.train_val_split > 0.0:
-        split = ds["train"].train_test_split(data_args.train_val_split)
-        ds["train"] = split["train"]
-        ds["validation"] = split["test"]
-
-    # Prepare label mappings.
-    # We'll include these in the model's config to get human readable labels in the Inference API.
-    labels = ds["train"].features["labels"].names
-    label2id, id2label = dict(), dict()
-    for i, label in enumerate(labels):
-        label2id[label] = str(i)
-        id2label[str(i)] = label
-
-    # Load the accuracy metric from the datasets package
-    # metric = datasets.load_metric("accuracy")
-    metric = datasets.load_metric("metric.py")
-
-    # Define our compute_metrics function. It takes an ``EvalPrediction`` object (a namedtuple with a
-    # predictions and label_ids field) and has to return a dictionary string to float.
-    def compute_metrics(p):
-        """Computes accuracy on a batch of predictions"""
-        return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)
-
-    config = AutoConfig.from_pretrained(
-        model_args.config_name or model_args.model_name_or_path,
-        num_labels=len(labels),
-        label2id=label2id,
-        id2label=id2label,
-        finetuning_task="image-classification",
-        cache_dir=model_args.cache_dir,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-    model = AutoModelForImageClassification.from_pretrained(
-        model_args.model_name_or_path,
-        from_tf=bool(".ckpt" in model_args.model_name_or_path),
-        config=config,
-        cache_dir=model_args.cache_dir,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-    feature_extractor = AutoFeatureExtractor.from_pretrained(
-        model_args.feature_extractor_name or model_args.model_name_or_path,
-        cache_dir=model_args.cache_dir,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-
-
-    if delta_args.delta_type.lower() != "none":
-        from opendelta import AutoDeltaConfig,AutoDeltaModel
-        delta_config = AutoDeltaConfig.from_dict(vars(delta_args))
-        delta_model = AutoDeltaModel.from_config(delta_config, backbone_model=model)
-        delta_model.freeze_module(set_state_dict = True)
-        delta_model.log(delta_ratio=True, trainable_ratio=True, visualization=True)
-
-    # Define torchvision transforms to be applied to each image.
-    normalize = Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std)
-    _train_transforms = Compose(
-        [
-            RandomResizedCrop(feature_extractor.size),
-            RandomHorizontalFlip(),
-            ToTensor(),
-            normalize,
-        ]
-    )
-    _val_transforms = Compose(
-        [
-            Resize(feature_extractor.size),
-            CenterCrop(feature_extractor.size),
-            ToTensor(),
-            normalize,
-        ]
-    )
-
-    def train_transforms(example_batch):
-        """Apply _train_transforms across a batch."""
-        example_batch["pixel_values"] = [
-            _train_transforms(pil_img.convert("RGB")) for pil_img in example_batch["image"]
-        ]
-        return example_batch
-
-    def val_transforms(example_batch):
-        """Apply _val_transforms across a batch."""
-        example_batch["pixel_values"] = [_val_transforms(pil_img.convert("RGB")) for pil_img in example_batch["image"]]
-        return example_batch
-
-    if training_args.do_train:
-        if "train" not in ds:
-            raise ValueError("--do_train requires a train dataset")
-        if data_args.max_train_samples is not None:
-            ds["train"] = ds["train"].shuffle(seed=training_args.seed).select(range(data_args.max_train_samples))
-        # Set the training transforms
-        ds["train"].set_transform(train_transforms)
-
-    if training_args.do_eval:
-        if "validation" not in ds:
-            raise ValueError("--do_eval requires a validation dataset")
-        if data_args.max_eval_samples is not None:
-            ds["validation"] = (
-                ds["validation"].shuffle(seed=training_args.seed).select(range(data_args.max_eval_samples))
-            )
-        # Set the validation transforms
-        ds["validation"].set_transform(val_transforms)
-
-    # Initalize our trainer
-    trainer = Trainer(
-        model=model,
-        args=training_args,
-        train_dataset=ds["train"] if training_args.do_train else None,
-        eval_dataset=ds["validation"] if training_args.do_eval else None,
-        compute_metrics=compute_metrics,
-        tokenizer=feature_extractor,
-        data_collator=collate_fn,
-    )
-
-    # Training
-    if training_args.do_train:
-        checkpoint = None
-        if training_args.resume_from_checkpoint is not None:
-            checkpoint = training_args.resume_from_checkpoint
-        elif last_checkpoint is not None:
-            checkpoint = last_checkpoint
-        train_result = trainer.train(resume_from_checkpoint=checkpoint)
-        trainer.save_model()
-        trainer.log_metrics("train", train_result.metrics)
-        trainer.save_metrics("train", train_result.metrics)
-        trainer.save_state()
-
-    # Evaluation
-    if training_args.do_eval:
-        metrics = trainer.evaluate()
-        trainer.log_metrics("eval", metrics)
-        trainer.save_metrics("eval", metrics)
-
-    # Write model card and (optionally) push to hub
-    kwargs = {
-        "finetuned_from": model_args.model_name_or_path,
-        "tasks": "image-classification",
-        "dataset": data_args.dataset_name,
-        "tags": ["image-classification"],
-    }
-    if training_args.push_to_hub:
-        trainer.push_to_hub(**kwargs)
-    else:
-        trainer.create_model_card(**kwargs)
-
-
-if __name__ == "__main__":
-    main()
--- a/examples/examples_prompt/backbones/beit.py
+++ b/examples/examples_prompt/backbones/beit.py
@ -0,0 +1,145 @@
+from openpromptu.data_utils import InputExample
+import torch
+from transformers.data.data_collator import torch_default_data_collator
+from transformers.data.data_collator import DataCollatorMixin as HfDataCollatorMixin
+import numpy as np
+from transformers import (
+    AutoConfig,
+    AutoFeatureExtractor,
+    AutoModelForImageClassification,
+)
+from transformers import ViTFeatureExtractor
+
+from transformers import Trainer as HfTrainer
+import torch.nn as nn
+
+def process_example(raw_example, **kwargs):
+    tokenizer = kwargs['tokenizer']
+    inputs = tokenizer(raw_example['image'], return_tensors='pt')
+    inputs['labels'] = raw_example['labels']
+    return inputs
+
+def get_prompts(task, tokenizer, data_args, template_id="0", verbalizer_id="0"):
+    # from openpromptu.prompts import ManualVerbalizer
+    # from openpromptu.prompts import ManualTemplate
+    # from openpromptu import TokenizerWrapper
+    # template = ManualTemplate(text = task.templates_text[template_id])
+    # verbalizer = ManualVerbalizer(tokenizer=tokenizer, classes = task.labels_list, label_words=task.verbalizers[verbalizer_id])
+    # tokenizer_wrapper = TokenizerWrapper(max_seq_length=data_args.max_source_length, tokenizer=tokenizer, truncate_method="balanced", mask_token_func=mask_token_func)
+    return None, None, None
+
+def preprocess_function(raw_example, **kwargs):
+    # from IPython import embed; embed(header="Therefa")
+    tokenizer = kwargs['tokenizer']
+    model_inputs = tokenizer(raw_example['image'], return_tensors='pt')
+    model_inputs['pixel_values'] = model_inputs['pixel_values'].squeeze()
+    model_inputs['labels'] = raw_example['labels']
+    return model_inputs
+
+def compute_metrics(eval_preds, dataset_name, eval_metric):
+    # from IPython import embed; embed(header="In compute metrics")
+
+    preds, labels = eval_preds.predictions, eval_preds.label_ids
+
+    preds = np.argmax(preds, axis=-1)
+
+    result = {}
+    average_metrics = []
+    for metric in eval_metric:
+        metric_item = metric(preds, labels)
+        metric_value =  list(metric_item.values())
+        result.update(metric_item)
+        average_metrics.extend(metric_value)
+    print("average:",average_metrics)
+    average_metric = sum(average_metrics)/len(average_metrics)
+    result.update({"average_metrics":average_metric})
+    return result
+
+def mask_token_func(tokenizer, ith_mask=0):
+    return tokenizer.mask_token
+
+def get_remove_columns(dataset_features):
+    # dataset_features.pop("label")
+    print("remove_columns: {}".format(dataset_features))
+    return dataset_features
+
+class DataCollator(HfDataCollatorMixin):
+    def __init__(self, *args, **kwargs):
+        self.return_tensors='pt'
+
+    def torch_call(self, features):
+        # from IPython import embed; embed(header="in data collator")
+        a = torch_default_data_collator(features=features)
+        # from IPython import embed; embed(header="in data collator")
+        return a
+
+
+def get_backbone(model_args, **kwargs):
+    config = AutoConfig.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    config.dropout_rate = 0.0
+    tokenizer = AutoFeatureExtractor.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+
+        use_fast=model_args.use_fast_tokenizer,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+
+    model = AutoModelForImageClassification.from_pretrained(
+        model_args.model_name_or_path,
+        from_tf=bool(".ckpt" in model_args.model_name_or_path),
+        config=config,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    config.num_labels = model_args.num_classes
+    old_classifier = model.classifier
+    model.classifier = nn.Linear(old_classifier.in_features, config.num_labels)
+
+
+    return config, tokenizer, model
+
+class Trainer(HfTrainer):
+    def __init__(self, verbalizer=None, eval_task=None, **kwargs):
+        super().__init__(**kwargs)
+        self.verbalizer=verbalizer
+        self.eval_task=eval_task
+        self.compute_metrics = self._compute_metrics
+        self.loss_fn = nn.CrossEntropyLoss()
+
+    def compute_loss(self, model, inputs, return_outputs=False):
+        labels = inputs.pop('labels')
+        outputs = model(**inputs)
+        logits = outputs.get("logits")
+
+        loss = self.loss_fn(logits, labels)
+        return (loss, outputs) if return_outputs else loss
+
+    def _compute_metrics(self, eval_preds):
+        # from IPython import embed; embed(header="In compute metrics")
+
+        preds, labels = eval_preds.predictions, eval_preds.label_ids
+
+        preds = np.argmax(preds, axis=-1)
+
+        result = {}
+        average_metrics = []
+        for metric in self.eval_task.metric:
+            metric_item = metric(preds, labels)
+            metric_value =  list(metric_item.values())
+            result.update(metric_item)
+            average_metrics.extend(metric_value)
+        print("average:",average_metrics)
+        average_metric = sum(average_metrics)/len(average_metrics)
+        result.update({"average_metrics":average_metric})
+        from IPython import embed; embed(header="In compute metrics")
+        return result
+
+
--- a/examples/examples_prompt/backbones/bert.py
+++ b/examples/examples_prompt/backbones/bert.py
@ -0,0 +1,141 @@
+from openpromptu.data_utils import InputExample
+import torch
+from transformers.data.data_collator import torch_default_data_collator
+from transformers.data.data_collator import DataCollatorMixin as HfDataCollatorMixin
+import numpy as np
+from transformers import (
+    AutoConfig,
+    AutoModelForMaskedLM,
+    AutoTokenizer,
+)
+
+from transformers import Trainer as HfTrainer
+
+
+def preprocess_function(raw_example, **kwargs):
+    tokenizer = kwargs['tokenizer']
+    data_args = kwargs['data_args']
+    template = kwargs['template']
+    verbalizer = kwargs['verbalizer']
+    tokenizer_wrapper = kwargs['tokenizer_wrapper']
+
+    example = InputExample(**raw_example)
+    example, other = template.wrap_one_example(example)
+    input_sentence = tokenizer_wrapper.merge_wrapped_example(example)
+    model_inputs = tokenizer(input_sentence, max_length=data_args.max_source_length,
+                        padding="max_length", truncation=True)
+    return model_inputs
+
+def compute_metrics(eval_preds, dataset_name, eval_metric):
+    # from IPython import embed; embed(header="In compute metrics")
+
+    preds, labels = eval_preds.predictions, eval_preds.label_ids
+
+    preds = np.argmax(preds, axis=-1)
+
+    result = {}
+    average_metrics = []
+    for metric in eval_metric:
+        metric_item = metric(preds, labels)
+        metric_value =  list(metric_item.values())
+        result.update(metric_item)
+        average_metrics.extend(metric_value)
+    print("average:",average_metrics)
+    average_metric = sum(average_metrics)/len(average_metrics)
+    result.update({"average_metrics":average_metric})
+    return result
+
+def mask_token_func(tokenizer, ith_mask=0):
+    return tokenizer.mask_token
+
+def get_remove_columns(dataset_features):
+    dataset_features.pop("label")
+    return dataset_features
+
+
+def get_prompts(task, tokenizer, data_args, template_id="0", verbalizer_id="0"):
+    from openpromptu.prompts import ManualVerbalizer
+    from openpromptu.prompts import ManualTemplate
+    from openpromptu import TokenizerWrapper
+    template = ManualTemplate(text = task.templates_text[template_id])
+    verbalizer = ManualVerbalizer(tokenizer=tokenizer, classes = task.labels_list, label_words=task.verbalizers[verbalizer_id])
+    tokenizer_wrapper = TokenizerWrapper(max_seq_length=data_args.max_source_length, tokenizer=tokenizer, truncate_method="balanced", mask_token_func=mask_token_func)
+    return template, verbalizer, tokenizer_wrapper
+
+class DataCollator(HfDataCollatorMixin):
+    def __init__(self, *args, **kwargs):
+        self.return_tensors='pt'
+
+    def torch_call(self, features):
+        return torch_default_data_collator(features=features)
+
+
+
+def get_backbone(model_args, **kwargs):
+    config = AutoConfig.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    config.dropout_rate = 0.0
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        use_fast=model_args.use_fast_tokenizer,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    model = AutoModelForMaskedLM.from_pretrained(
+        model_args.model_name_or_path,
+        from_tf=bool(".ckpt" in model_args.model_name_or_path),
+        config=config,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+
+    model.resize_token_embeddings(len(tokenizer))
+    return config, tokenizer, model
+
+class Trainer(HfTrainer):
+    def __init__(self, verbalizer=None, eval_task=None, **kwargs):
+        super().__init__(**kwargs)
+        self.verbalizer=verbalizer
+        self.eval_task=eval_task
+        self.compute_metrics = self._compute_metrics
+
+
+    def compute_loss(self, model, inputs, return_outputs=False):
+        labels = inputs.pop('labels')
+        outputs = model(**inputs)
+        logits = outputs.get("logits")
+        input_ids = inputs['input_ids']
+        verbalizer = self.verbalizer.cuda()
+        logits_at_mask = logits[torch.where(input_ids == verbalizer.tokenizer.mask_token_id)]
+        label_logits = verbalizer.process_logits(logits_at_mask)
+        loss_fct = torch.nn.CrossEntropyLoss()
+        loss = loss_fct(label_logits, labels)
+        outputs.logits = label_logits
+        return (loss, outputs) if return_outputs else loss
+
+    def _compute_metrics(self, eval_preds):
+        # from IPython import embed; embed(header="In compute metrics")
+
+        preds, labels = eval_preds.predictions, eval_preds.label_ids
+
+        preds = np.argmax(preds, axis=-1)
+
+        result = {}
+        average_metrics = []
+        for metric in self.eval_task.metric:
+            metric_item = metric(preds, labels)
+            metric_value =  list(metric_item.values())
+            result.update(metric_item)
+            average_metrics.extend(metric_value)
+        print("average:",average_metrics)
+        average_metric = sum(average_metrics)/len(average_metrics)
+        result.update({"average_metrics":average_metric})
+        return result
+
+
--- a/examples/examples_prompt/backbones/t5.py
+++ b/examples/examples_prompt/backbones/t5.py
@ -0,0 +1,178 @@
+
+from openpromptu.data_utils import InputExample
+from transformers import Seq2SeqTrainer as HfSeq2SeqTrainer
+from transformers import (
+    AutoConfig,
+    AutoModelForSeq2SeqLM,
+    AutoTokenizer,
+)
+from transformers.data.data_collator import DataCollatorForSeq2Seq as DataCollator
+import torch
+
+def mask_token_func(tokenizer, ith_mask):
+    return tokenizer.additional_special_tokens[ith_mask]
+
+def get_remove_columns(dataset_features):
+    return dataset_features
+
+def preprocess_function(raw_example, **kwargs):
+    # max_target_length += 1
+    tokenizer = kwargs['tokenizer']
+    data_args = kwargs['data_args']
+    template = kwargs['template']
+    verbalizer = kwargs['verbalizer']
+    tokenizer_wrapper = kwargs['tokenizer_wrapper']
+    split = kwargs['split']
+    example = InputExample(**raw_example)
+
+
+    try:
+        example = verbalizer.wrap_one_example(example)
+        example, other = template.wrap_one_example(example)
+        input_sentence = tokenizer_wrapper.merge_wrapped_example(example)
+        model_inputs = tokenizer(input_sentence, max_length=256,
+                            padding="max_length", truncation=True)
+    except:
+        from IPython import embed; embed(header="Therer")
+
+    with tokenizer.as_target_tokenizer():
+        label = tokenizer(other['tgt_text']).input_ids
+
+    model_inputs["labels"] = label
+    return model_inputs
+
+def get_backbone(model_args, **kwargs):
+    config = AutoConfig.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    config.dropout_rate = 0.0
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        use_fast=model_args.use_fast_tokenizer,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+
+
+    model = AutoModelForSeq2SeqLM.from_pretrained(
+        model_args.model_name_or_path,
+        from_tf=bool(".ckpt" in model_args.model_name_or_path),
+        config=config,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+        )
+    return config, tokenizer, model
+
+
+def get_prompts(task, tokenizer, data_args, template_id="0", verbalizer_id="0"):
+    from openpromptu.prompts import GenerationVerbalizer
+    from openpromptu.prompts import ManualTemplate
+    from openpromptu import TokenizerWrapper
+    template = ManualTemplate(text = task.templates_text[template_id])
+    verbalizer = GenerationVerbalizer(tokenizer=tokenizer, classes = task.labels_list, label_words=task.verbalizers[verbalizer_id])
+    tokenizer_wrapper = TokenizerWrapper(max_seq_length=data_args.max_source_length, tokenizer=tokenizer, truncate_method="balanced", mask_token_func=mask_token_func)
+    return template, verbalizer, tokenizer_wrapper
+
+class Trainer(HfSeq2SeqTrainer):
+    def __init__(self, verbalizer=None, eval_task=None, **kwargs):
+        super().__init__(**kwargs)
+        self.eval_task = eval_task
+        self.compute_metrics = self._compute_metrics
+
+    def compute_loss(self, model, inputs, return_outputs=False):
+        outputs = model(**inputs)
+        if return_outputs:
+            return (outputs.loss, outputs)
+        else:
+            return outputs.loss
+
+    def prediction_step(
+        self,
+        model, #nn.Module,
+        inputs, #Dict[str, Union[torch.Tensor, Any]],
+        prediction_loss_only, #: bool,
+        ignore_keys, #: Optional[List[str]] = None,
+    ): #-> Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]:
+        """
+        Perform an evaluation step on :obj:`model` using obj:`inputs`.
+
+        Subclass and override to inject custom behavior.
+
+        Args:
+            model (:obj:`nn.Module`):
+                The model to evaluate.
+            inputs (:obj:`Dict[str, Union[torch.Tensor, Any]]`):
+                The inputs and targets of the model.
+
+                The dictionary will be unpacked before being fed to the model. Most models expect the targets under the
+                argument :obj:`labels`. Check your model's documentation for all accepted arguments.
+            prediction_loss_only (:obj:`bool`):
+                Whether or not to return the loss only.
+
+        Return:
+            Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]: A tuple with the loss, logits and
+            labels (each being optional).
+        """
+        if not self.args.predict_with_generate or prediction_loss_only:
+            return super().prediction_step(
+                model, inputs, prediction_loss_only=prediction_loss_only, ignore_keys=ignore_keys
+            )
+
+
+        has_labels = "labels" in inputs
+        inputs = self._prepare_inputs(inputs)
+        gen_kwargs = {
+            "max_length": 10, # self._max_length if s is not None else self.model.config.max_length,
+            "num_beams": 1 #self._num_beams if self._num_beams is not None else self.model.config.num_beams,
+        }
+        generated_tokens = self.model.generate(
+            inputs["input_ids"],
+            attention_mask=inputs["attention_mask"],
+            **gen_kwargs,
+        )
+        # in case the batch is shorter than max length, the output should be padded
+        if generated_tokens.shape[-1] < gen_kwargs["max_length"]:
+            generated_tokens = self._pad_tensors_to_max_len(generated_tokens, gen_kwargs["max_length"])
+
+        with torch.no_grad():
+
+            outputs = model(**inputs)
+            if has_labels:
+                if self.label_smoother is not None:
+                    loss = self.label_smoother(outputs, inputs["labels"]).mean().detach()
+                else:
+                    loss = (outputs["loss"] if isinstance(outputs, dict) else outputs[0]).mean().detach()
+            else:
+                loss = None
+
+        if self.args.prediction_loss_only:
+            return (loss, None, None)
+
+        labels = inputs["labels"]
+        if labels.shape[-1] < gen_kwargs["max_length"]:
+            labels = self._pad_tensors_to_max_len(labels, gen_kwargs["max_length"])
+
+        # from IPython import embed; embed(header="In seqseqtrainer")
+        return (loss, generated_tokens, labels)
+
+    def _compute_metrics(self, eval_preds):
+        # from IPython import embed; embed(header="In compute metrics")
+        preds, labels = eval_preds
+        decoded_preds = self.tokenizer.batch_decode(preds, skip_special_tokens=True)
+        decoded_labels = self.tokenizer.batch_decode(labels, skip_special_tokens=True)
+        # post_processor = .get(data_args.dataset_name[0], tokenizer,
+        #                                     data_args.ignore_pad_token_for_loss)
+        # decoded_preds, decoded_labels = post_processor.process(preds, labels, data_info)
+        result = {}
+        for metric in self.eval_task.metric:
+            result.update(metric(decoded_preds, decoded_labels))
+
+        average_metric = sum(result.values())/len(result)
+        result.update({"average_metrics":average_metric})
+        return result
+
--- a/examples/examples_prompt/data_processors/data_collator.py
+++ b/examples/examples_prompt/data_processors/data_collator.py
@ -1,28 +0,0 @@
-# import numpy as np
-# from dataclasses import dataclass
-# from transformers import DataCollatorForSeq2Seq
-
-
-# @dataclass
-# class TaskDataCollatorForSeq2Seq(DataCollatorForSeq2Seq):
-#    def check_uniqueness(self, samples):
-#         assert len(np.unique(samples)) == 1
-
-#    def __call__(self, features):
-#      #    tasks = [d.pop('task') for d in features]
-#      #    self.check_uniqueness(tasks)
-#         output = super().__call__(features)
-#      #    output["task"] = tasks[0]
-#         return output
-
-# # class CustomDataCollator(DefaultDataCollator):
-# #    def check_uniqueness(self, samples):
-# #         assert len(np.unique(samples)) == 1
-
-# #    def __call__(self, features):
-# #         mask_positions = [d.pop('mask_positions') for d in features]
-# #      #    self.check_uniqueness(tasks)
-# #         output = super().__call__(features)
-
-# #      #    output["task"] = tasks[0]
-# #         return output
--- a/examples/examples_prompt/data_processors/postprocessors.py
+++ b/examples/examples_prompt/data_processors/postprocessors.py
@ -1,67 +0,0 @@
-import abc
-from collections import OrderedDict
-import numpy as np
-
-"""Defines functions to process the outputs to make them ready for the evaluation."""
-
-def string_to_float(string, default=-1., **unused_kwargs):
-  """Converts string to float, using default when conversion not possible."""
-  try:
-    return float(string)
-  except ValueError:
-    return default
-
-
-class PostProcessor(abc.ABC):
-    """Postprocess the predictions and labels to make them suitable for
-    evaluation."""
-    def __init__(self, tokenizer, ignore_pad_token_for_loss):
-       self.tokenizer = tokenizer
-       self.ignore_pad_token_for_loss = ignore_pad_token_for_loss
-
-
-    def process(self, preds, labels, data_info=None):
-        if isinstance(preds, tuple):
-            preds = preds[0]
-        decoded_preds = self.tokenizer.batch_decode(preds, skip_special_tokens=True)
-        if self.ignore_pad_token_for_loss:
-            # Replace -100 in the labels as we can't decode them.
-            labels = np.where(labels != -100, labels, self.tokenizer.pad_token_id)
-        decoded_labels = self.tokenizer.batch_decode(labels, skip_special_tokens=True)
-        # Some simple post-processing
-        decoded_preds = [pred.strip() for pred in decoded_preds]
-        decoded_labels = [label.strip() for label in decoded_labels]
-        return decoded_preds, decoded_labels
-
-
-class MultiRC(PostProcessor):
-    def process(self, preds, labels, data_info):
-        preds, labels = super().process(preds, labels, data_info)
-        preds = [{"group": info["group"], "value":pred} \
-            for info, pred in zip(data_info, preds)]
-        labels = [{"group": info["group"], "value": label}\
-            for info, label in zip(data_info, labels)]
-        return preds, labels
-
-class Record(PostProcessor):
-    def process(self, preds, labels, data_info):
-        preds, labels = super().process(preds, labels, data_info)
-        labels = [info["answers"] for info in data_info]
-        return preds, labels
-
-
-POSTPROCESSOR_MAPPING = OrderedDict(
-    [
-        ('superglue-record', Record),
-        ('superglue-multirc', MultiRC)
-    ]
-)
-
-class AutoPostProcessor:
-    @classmethod
-    def get(self, task, tokenizer, ignore_pad_token_for_loss):
-        if task in POSTPROCESSOR_MAPPING:
-            return POSTPROCESSOR_MAPPING[task](tokenizer, ignore_pad_token_for_loss)
-        return PostProcessor(tokenizer, ignore_pad_token_for_loss)
-
-
--- a/examples/examples_prompt/data_processors/processor.py
+++ b/examples/examples_prompt/data_processors/processor.py
@ -0,0 +1,96 @@
+import abc
+from typing import Callable, List, Mapping, Dict
+import datasets
+import logging
+import numpy as np
+import torch
+logger = logging.getLogger(__name__)
+
+
+class AbstractTask(abc.ABC):
+    name = NotImplemented
+    config = NotImplemented
+    prefix = NotImplemented
+    metric = NotImplemented
+    metric_names = NotImplemented
+    split_map = None
+    labels_list = None
+    split_to_data_split: Mapping[str, str] = \
+        {"train": "train", "validation": "validation", "test": "test"}
+    split_valid_to_make_test = True
+    split_train_to_make_test = False
+    keep_fields_after_preprocess = ["label"]  # The fields that should be kept even after preprocessiing
+
+    def __init__(self, config, data_args, seed=42, default_max_length=1):
+        self.config = config
+        self.seed = seed
+        self.data_args = data_args
+
+        self.default_max_length = default_max_length
+
+    def check_n_obs(self, n_obs, total_size):
+        if n_obs is not None and n_obs > total_size:
+            n_obs = total_size
+            logger.warning("n_obs is set to %s", n_obs)
+        return n_obs
+
+    def shuffled_indices(self, dataset):
+        num_samples = len(dataset)
+        generator = torch.Generator()
+        generator.manual_seed(self.seed)
+        return torch.randperm(num_samples, generator=generator).tolist()
+
+    def subsample(self, dataset, n_obs=None, indices=None):
+        """
+        Given a dataset returns the subsampled dataset.
+        :param n_obs: the number of samples of the subsampled dataset.
+        :param indices: indices to select the samples from, if not given, indices are computed
+        from by shuffling the given dataset.
+        :return: subsampled dataset.
+        """
+        num_samples = len(dataset)
+        n_obs = self.check_n_obs(n_obs, num_samples)
+        if indices is None:
+           indices = self.shuffled_indices(dataset)
+        indices = indices[:n_obs]
+        return dataset.select(indices)
+
+    def load_dataset(self, split: int):
+        return datasets.load_dataset(self.name, self.config, split=split, script_version="master")
+
+    def get_split_indices(self, split, dataset, validation_size):
+        indices = self.shuffled_indices(dataset)
+        if split == "validation":
+            return indices[:validation_size]
+        else:
+            return indices[validation_size:]
+
+    def preprocessor(self, example):
+        return example
+
+    def get(self, split, n_obs=None, split_validation_test=False):
+        # For small datasets (n_samples < 10K) without test set, we divide validation set to
+        # half, use one half as test set and one half as validation set.
+        if split in ["eval", "dev", "valid"]:
+            split = "validation"
+        if split_validation_test and self.split_valid_to_make_test \
+                and split != "train":
+            mapped_split = self.split_to_data_split["validation"]
+            dataset = self.load_dataset(split=mapped_split)
+            indices = self.get_split_indices(split, dataset, validation_size=len(dataset)//2)
+            dataset = self.subsample(dataset, n_obs, indices)
+        # For larger datasets (n_samples > 10K), we divide training set into 1K as
+        # validation and the rest as training set, keeping the original validation
+        # set as the test set.
+        elif split_validation_test and self.split_train_to_make_test \
+                and split != "test":
+            dataset = self.load_dataset(split="train")
+            indices = self.get_split_indices(split, dataset, validation_size=1000)
+            dataset = self.subsample(dataset, n_obs, indices)
+        else:
+            mapped_split = self.split_to_data_split[split]
+            dataset = self.load_dataset(split=mapped_split)
+            # shuffles the data and samples it.
+            if n_obs is not None:
+                dataset = self.subsample(dataset, n_obs)
+        return dataset.map(self.preprocessor)
--- a/examples/examples_prompt/data_processors/tasks.py
+++ b/examples/examples_prompt/data_processors/tasks.py
@ -4,7 +4,7 @@ import abc
 import functools
 from selectors import EpollSelector
 from typing import Callable, List, Mapping
-from examples_prompt.trainers.trainer_utils import pad_punctuation
+from .utils import pad_punctuation
 from examples_prompt.metrics import metrics
 from .utils import round_stsb_target
 import datasets
@ -30,281 +30,8 @@ from collections import defaultdict
 from openprompt.utils import round_list
 import warnings

-# class MLMTokenizerWrapper:
-#     def __init__(self, max_seq_length, tokenizer, truncate_method, mask_token_func=lambda i: "<mask>"):
-#         self.max_seq_length=max_seq_length
-#         self.tokenizer=tokenizer
-#         self.num_special_tokens_to_add = len(tokenizer("")['input_ids'])
-#         # from IPython import embed; embed(header="Truega")
-#         self.truncate_method=truncate_method
-#         self.total_passed_sentences = 0
-#         self.num_truncated_sentences = 0
-#         self.mask_token_func = mask_token_func

-#         if truncate_method=='tail':
-#             self.truncate_fct = self.truncate_from_tail
-#         elif truncate_method=='head':
-#             self.truncate_fct = self.truncate_from_head
-#         elif truncate_method == 'balanced':
-#             self.truncate_fct = self.balanced_truncate
-#         else:
-#             raise NotImplementedError
-
-
-#     def merge_wrapped_example(self, wrapped_example,):
-#         ''' # TODO doens't consider the situation that input has two parts
-#         '''
-
-#         wrapped_example
-
-#         # for some dataset like SuperGLUE.COPA, the answer requires prediction an span of
-#         # the input. Or in generation tasks, we need to generate a piece of target_text.
-#         # In these case, it tokenized to the encoded_tgt_text for furture use.
-
-
-
-#         encoder_inputs = defaultdict(list)
-#         # from IPython import embed; embed(header="Line 67")
-
-#         mask_count = 0
-#         for piece in wrapped_example:
-#             if piece['text'] == "<mask>":
-#                 encode_text = self.tokenizer.encode(self.mask_token_func(mask_count), add_special_tokens=False, return_special_tokens_mask=True )
-#                 mask_count += 1
-#             else:
-#                 encode_text = self.tokenizer.encode(piece['text'], add_special_tokens=False, return_special_tokens_mask=True )
-#             encoder_inputs['input_ids'].append(encode_text)
-#             encoder_inputs['shortenable_ids'].append([piece['shortenable_ids']] * len(encode_text))
-
-
-#         encoder_inputs = self.truncate(encoder_inputs=encoder_inputs)
-#         encoder_inputs.pop("shortenable_ids")
-#         encoder_inputs = self.concate_parts(input_dict=encoder_inputs)
-#         decoded_inputs = self.tokenizer.decode(encoder_inputs['input_ids'], clean_up_tokenization_spaces=False)
-
-#         return decoded_inputs
-
-
-#     @staticmethod
-#     def balanced_truncate(input_dict: Dict,
-#                  num_tokens_to_truncate: int=0) -> Dict:
-#         '''truncate the inputs with balance, number of cut tokens is proportional to the part's length.
-#         '''
-#         shortenable_lens = [len(parts) if parts[0]==1 else 0
-#                                   for parts in input_dict['shortenable_ids']]
-#         total_shortenable_len = sum(shortenable_lens)
-#         num_tokens_to_truncate_each_part = [part_len/total_shortenable_len*num_tokens_to_truncate
-#                                                 for part_len in shortenable_lens]
-#         round_list(num_tokens_to_truncate_each_part, num_tokens_to_truncate)
-
-#         truncated_example = defaultdict(list)
-#         for key in input_dict:
-#             parts = input_dict[key]
-#             for num_tokens_to_truncate_part, part in zip(num_tokens_to_truncate_each_part, parts):
-#                 truncated_example[key].append(part[:len(part)-num_tokens_to_truncate_part])
-#         return truncated_example
-
-#     @staticmethod
-#     def truncate_from_tail(input_dict: Dict,
-#                  num_tokens_to_truncate: int=0) -> Dict:
-#         r"""truncate the inputs from the rear
-#         """
-#         truncated_example = defaultdict(list)
-#         shortenable_ids = input_dict['shortenable_ids']
-
-#         for key in input_dict:
-#             parts = input_dict[key]
-#             to_trunc = num_tokens_to_truncate
-#             for i, part in enumerate(parts[::-1]):
-#                 if len(part) == 0: # to prevent some part are empty after tokenization
-#                     continue
-#                 if shortenable_ids[-1-i][0]==0: # ==0 means the part is not shortenable
-#                     continue
-#                 parts[-1-i] = part[:-to_trunc] if to_trunc<len(part) else []
-#                 to_trunc -= len(part)
-#                 if to_trunc <= 0:
-#                     break
-#             truncated_example[key] = parts
-#         return truncated_example
-
-#     @staticmethod
-#     def truncate_from_head(input_dict: Dict,
-#                  num_tokens_to_truncate: int=0) -> Dict:
-#         r"""truncate the inputs from the head
-#         """
-#         truncated_example = defaultdict(list)
-#         shortenable_ids = input_dict['shortenable_ids']
-#         for key in input_dict:
-#             parts = input_dict[key]
-#             to_trunc = num_tokens_to_truncate
-#             for i, part in enumerate(parts):
-#                 if shortenable_ids[i][0]==0: # ==0 means the part is not shortenable
-#                     continue
-#                 parts[i] = part[:-to_trunc] if to_trunc<len(part) else []
-#                 to_trunc -= len(part)
-#                 if to_trunc <= 0:
-#                     break
-#             truncated_example[key] = parts
-#         return truncated_example
-
-#     @staticmethod
-#     def concate_parts(input_dict: Dict) -> Dict:
-#         for key in input_dict:
-#             input_dict[key] = list(itertools.chain(*input_dict[key]))
-#         return input_dict
-
-
-#     def truncate(self, encoder_inputs):
-#         total_tokens = sum([len(part) for part in encoder_inputs['input_ids']])
-#         num_specials = self.num_special_tokens_to_add
-#         # print("num_specials", num_specials)
-#         num_tokens_to_truncate = total_tokens - self.max_seq_length + num_specials
-#         self.total_passed_sentences+=1
-#         if num_tokens_to_truncate>0:
-#             self.num_truncated_sentences += 1
-#             if num_tokens_to_truncate > sum([len(x) for x in encoder_inputs['shortenable_ids']]):
-#                 raise RuntimeError("num_tokens_to_truncate larger than number of shortenable tokens.")
-#             encoder_inputs = self.truncate_fct(input_dict=encoder_inputs,
-#                           num_tokens_to_truncate=num_tokens_to_truncate)
-#         return encoder_inputs
-
-#     def tokenizer_preprocessor(self, example):
-#         # source, target = example
-#         # from IPython import embed; embed(header="Trehre2")
-#         label = example['label']
-#         guid = example['idx']
-#         meta = dict(example)
-#         meta.pop("label")
-#         meta.pop("idx")
-
-
-
-#         # from IPython import embed; embed(header="Trehre2")
-
-#         e = InputExample(**{"meta": meta, 'label': label, 'guid': guid})
-
-#         if self.predict_with_generate:
-#             e = self.verbalizer.wrap_one_example(e)
-#         example_wrapped = self.template.wrap_one_example(e)
-#         encoded_sentence = self.tokenizer_wrapper.merge_wrapped_example(example_wrapped)
-#         print(encoded_sentence)
-#         if self.predict_with_generate:
-#             # return {"source": encoded_sentence, 'target': ', 'extra_fields':[]}
-#             return {"source": encoded_sentence, "label": label, 'target': '', 'extra_fields':{'dataset_name':self.name}}
-#         else:
-#             return {"source": encoded_sentence, "label": label, 'target': e.target_text, 'extra_fields':{'dataset_name':self.name}}
-
-
-
-
-
-
-
-
-
-
-
-
-
-class AbstractTask(abc.ABC):
-    name = NotImplemented
-    config = NotImplemented
-    prefix = NotImplemented
-    metric = NotImplemented
-    metric_names = NotImplemented
-    split_map = None
-    labels_list = None
-    split_to_data_split: Mapping[str, str] = \
-        {"train": "train", "validation": "validation", "test": "test"}
-    small_datasets_without_all_splits = ["cola", "wnli", "rte", "superglue-cb", "superglue-copa", "superglue-multirc",
-                                         "superglue-wic", "superglue-wsc.fixed", "superglue-rte", "mrpc", "stsb",
-                                         "superglue-boolq", "qqp", "qnli", "superglue-record", "sst2"]
-    large_data_without_all_splits = [] #["qqp", "qnli", "superglue-record", "sst2"]
-
-    def __init__(self, config, data_args, seed=42, default_max_length=1):
-        self.config = config
-        self.seed = seed
-        self.data_args = data_args
-        # self.tokenizer = tokenizer
-        # self.predict_with_generate = predict_with_generate
-        self.default_max_length = default_max_length
-
-        # generation_paradigm = getattr(config, "generation_paradigm", True)
-        # self.prompt = PromptCollections[self.name](tid, vid, generation_paradigm)
-
-
-    # def get_max_target_length(self, default_max_length):
-    #     if self.predict_with_generate:
-    #         return -1
-    #     else:
-    #         return default_max_length
-
-    def check_n_obs(self, n_obs, total_size):
-        if n_obs is not None and n_obs > total_size:
-            n_obs = total_size
-            logger.warning("n_obs is set to %s", n_obs)
-        return n_obs
-
-    def shuffled_indices(self, dataset):
-        num_samples = len(dataset)
-        generator = torch.Generator()
-        generator.manual_seed(self.seed)
-        return torch.randperm(num_samples, generator=generator).tolist()
-
-    def subsample(self, dataset, n_obs=None, indices=None):
-        """
-        Given a dataset returns the subsampled dataset.
-        :param n_obs: the number of samples of the subsampled dataset.
-        :param indices: indices to select the samples from, if not given, indices are computed
-        from by shuffling the given dataset.
-        :return: subsampled dataset.
-        """
-        num_samples = len(dataset)
-        n_obs = self.check_n_obs(n_obs, num_samples)
-        if indices is None:
-           indices = self.shuffled_indices(dataset)
-        indices = indices[:n_obs]
-        return dataset.select(indices)
-
-    def load_dataset(self, split: int):
-        return datasets.load_dataset(self.name, self.config, split=split, script_version="master")
-
-    def get_split_indices(self, split, dataset, validation_size):
-        indices = self.shuffled_indices(dataset)
-        if split == "validation":
-            return indices[:validation_size]
-        else:
-            return indices[validation_size:]
-
-    def preprocessor(self, example):
-        return example
-
-    def get(self, split, n_obs=None, split_validation_test=False):
-        # For small datasets (n_samples < 10K) without test set, we divide validation set to
-        # half, use one half as test set and one half as validation set.
-        if split in ["eval", "dev", "valid"]:
-            split = "validation"
-        if split_validation_test and self.name in self.small_datasets_without_all_splits \
-                and split != "train":
-            mapped_split = self.split_to_data_split["validation"]
-            dataset = self.load_dataset(split=mapped_split)
-            indices = self.get_split_indices(split, dataset, validation_size=len(dataset)//2)
-            dataset = self.subsample(dataset, n_obs, indices)
-        # For larger datasets (n_samples > 10K), we divide training set into 1K as
-        # validation and the rest as training set, keeping the original validation
-        # set as the test set.
-        elif split_validation_test and self.name in self.large_data_without_all_splits \
-                and split != "test":
-            dataset = self.load_dataset(split="train")
-            indices = self.get_split_indices(split, dataset, validation_size=1000)
-            dataset = self.subsample(dataset, n_obs, indices)
-        else:
-            mapped_split = self.split_to_data_split[split]
-            dataset = self.load_dataset(split=mapped_split)
-            # shuffles the data and samples it.
-            if n_obs is not None:
-                dataset = self.subsample(dataset, n_obs)
-        return dataset.map(self.preprocessor)
+from .processor import AbstractTask

 class Squad(AbstractTask):
    name = "squad"
@ -735,118 +462,95 @@ class SuperGLUEWIC(AbstractTask):
            return datasets.load_dataset('super_glue', 'wic', split=split, script_version="master")


-
-# class SuperGLUEWSCFixed(AbstractTask):
-#     # source: https://github.com/google-research/text-to-text-transfer-transformer/blob/master/t5/data/preprocessors.py
-#     """Convert WSC examples to text2text format.
-#      WSC includes a sentence along with 2 'spans': the first denoting a noun and
-#      the other a pronoun. The 'label' specifies whether or not the pronoun is
-#      referencing the noun. This preprocessor puts ' * ' around the noun and ' # '
-#      around the pronoun.
-#      For example, a typical example from WSC might look like
-#      {
-#          'text': 'This is a test sentence .',
-#          'span1_text': 'test',
-#          'span1_index': 3,
-#          'span2_text': 'This',
-#          'span2_index': 0,
-#          'label': 0
-#      }
-#      This example would be transformed to
-#      {
-#          'inputs': 'wsc text: # This # is a * test * sentence .',
-#          'targets': 'False'
-#      }
+# class SuperGLUERecord(AbstractTask):
+#     """Convert ReCoRD examples to text2text examples.
+#     ReCoRD contains a passage, query containing a '@placeholder' string, and a set
+#     of entities that are the possible values of the placeholder. Each train and
+#     validation example will have a list of answers, any of which would be
+#     considered correct.
+#     For example, a typical example from ReCoRD might look like
+#     {
+#       'passsage': 'This is the passage.',
+#       'query': 'A @placeholder is a bird.',
+#       'entities': ['penguin', 'potato', 'pigeon'],
+#       'answers': ['penguin', 'pigeon'],
+#     }
+#     which this preprocessor would turn into the following two examples:
+#     {
+#       'inputs': 'record query: A @placeholder is a bird. entities: penguin, '
+#                 'potato, pigeon passage: This is the passage.',
+#       'targets': 'penguin',
+#     }
+#     and
+#     {
+#       'inputs': 'record query: A @placeholder is a bird. entities: penguin, '
+#                 'potato, pigeon passage: This is the passage.',
+#       'targets': 'pigeon',
+#     }
 #     """
-#     name = "superglue-wsc.fixed"
-#     labels_list = ['0', '1']
+#     name = "superglue-record"
 #     split_to_data_split = {"train": "train",
 #                            "validation": "validation",
 #                            "test": "validation"}
-#     metric = [metrics.accuracy]
-#     metric_names = ["accuracy"]
+#     metric = [metrics.squad]
+#     metric_names = ["squad"]

 #     def load_dataset(self, split):
-#         return datasets.load_dataset('super_glue', 'wsc.fixed', split=split, script_version="master")
+#         return datasets.load_dataset('super_glue', 'record', split=split, script_version="master")

-#     def _mark_span(self, text, span_str, span_idx, mark):
-#         pattern_tmpl = r'^((?:\S+\s){N})(W)'
-#         pattern = re.sub('N', str(span_idx), pattern_tmpl)
-#         pattern = re.sub('W', span_str, pattern)
-#         return re.sub(pattern, r'\1{0} \2 {0}'.format(mark), text)
+#     def preprocessor(self, batch, add_prefix=True):
+#         new_batch = collections.defaultdict(list)
+#         keys = batch.keys()
+#         for values in zip(*batch.values()):
+#             ex = {k: v for k, v in zip(keys, values)}
+#             # updates the passage.
+#             passage = ex['passage']
+#             passage = re.sub(r'(\.|\?|\!|\"|\')\n@highlight\n', r'\1 ', passage)
+#             passage = re.sub(r'\n@highlight\n', '. ', passage)
+#             inputs = f"record query: {ex['query']} entities: {', '.join(ex['entities'])} passage: {passage}"
+#             if add_prefix:
+#                 inputs = self.name + " " + inputs
+#             # duplicates the samples based on  number of answers.
+#             num_answers = len(ex["answers"])
+#             num_duplicates = np.maximum(1, num_answers)
+#             new_batch["source"].extend([inputs] * num_duplicates)
+#             new_batch["target"].extend(ex["answers"] if num_answers > 0 else ["<unk>"])
+#             new_batch["task"].extend([self.name] * num_duplicates)
+#             new_batch["extra_fields"].extend([{"answers": ex["answers"]}]*num_duplicates)
+#         return new_batch

-#     def preprocessor(self, example, add_prefix=True):
-#         # converts text as done in T5.
-#         text = example['text']
-#         text = self._mark_span(text, example['span1_text'], example['span1_index'], '*')
-#         # Compensate for 2 added "words" added in previous step.
-#         span2_index = example['span2_index'] + 2 * int(example['span1_index'] < example['span2_index'])
-#         text = self._mark_span(text, example['span2_text'], span2_index, '#')
-#         src_texts = ["text:", text]
-#         tgt_texts = [str(example["label"])]
-#         return self.fseq2seq_format(src_texts, tgt_texts, add_prefix)
+#     def map_dataset(self, dataset, add_prefix=True):
+#         return dataset.map(functools.partial(self.preprocessor, add_prefix=add_prefix),
+#             batched=True, remove_columns=dataset.column_names)

-
-class SuperGLUERecord(AbstractTask):
-    """Convert ReCoRD examples to text2text examples.
-    ReCoRD contains a passage, query containing a '@placeholder' string, and a set
-    of entities that are the possible values of the placeholder. Each train and
-    validation example will have a list of answers, any of which would be
-    considered correct.
-    For example, a typical example from ReCoRD might look like
-    {
-      'passsage': 'This is the passage.',
-      'query': 'A @placeholder is a bird.',
-      'entities': ['penguin', 'potato', 'pigeon'],
-      'answers': ['penguin', 'pigeon'],
-    }
-    which this preprocessor would turn into the following two examples:
-    {
-      'inputs': 'record query: A @placeholder is a bird. entities: penguin, '
-                'potato, pigeon passage: This is the passage.',
-      'targets': 'penguin',
-    }
-    and
-    {
-      'inputs': 'record query: A @placeholder is a bird. entities: penguin, '
-                'potato, pigeon passage: This is the passage.',
-      'targets': 'pigeon',
-    }
-    """
-    name = "superglue-record"
+class Beans(AbstractTask):
+    name = "beans"
+    labels_list = ['angular_leaf_spot', 'bean_rust', "healthy"]
    split_to_data_split = {"train": "train",
                           "validation": "validation",
                           "test": "validation"}
-    metric = [metrics.squad]
-    metric_names = ["squad"]
+    metric = [metrics.accuracy]
+    metric_names = ["accuracy"]
+
+    verbalizers = {
+        "0": {
+        "0": "No",
+        "1": "Yes",
+        }
+    }
+
+    templates_text = {
+        "0": """{"meta":"sentence1"}"""
+    }

    def load_dataset(self, split):
-        return datasets.load_dataset('super_glue', 'record', split=split, script_version="master")
+        # from IPython import embed; embed(header="beans")
+        if self.data_args.datasets_load_from_disk:
+            return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/super_glue.wic")[split]
+        else:
+            return datasets.load_dataset('beans', split=split, script_version="master")

-    def preprocessor(self, batch, add_prefix=True):
-        new_batch = collections.defaultdict(list)
-        keys = batch.keys()
-        for values in zip(*batch.values()):
-            ex = {k: v for k, v in zip(keys, values)}
-            # updates the passage.
-            passage = ex['passage']
-            passage = re.sub(r'(\.|\?|\!|\"|\')\n@highlight\n', r'\1 ', passage)
-            passage = re.sub(r'\n@highlight\n', '. ', passage)
-            inputs = f"record query: {ex['query']} entities: {', '.join(ex['entities'])} passage: {passage}"
-            if add_prefix:
-                inputs = self.name + " " + inputs
-            # duplicates the samples based on  number of answers.
-            num_answers = len(ex["answers"])
-            num_duplicates = np.maximum(1, num_answers)
-            new_batch["source"].extend([inputs] * num_duplicates)
-            new_batch["target"].extend(ex["answers"] if num_answers > 0 else ["<unk>"])
-            new_batch["task"].extend([self.name] * num_duplicates)
-            new_batch["extra_fields"].extend([{"answers": ex["answers"]}]*num_duplicates)
-        return new_batch

-    def map_dataset(self, dataset, add_prefix=True):
-        return dataset.map(functools.partial(self.preprocessor, add_prefix=add_prefix),
-            batched=True, remove_columns=dataset.column_names)


 TASK_MAPPING = OrderedDict(
@ -866,8 +570,8 @@ TASK_MAPPING = OrderedDict(
        ('superglue-copa', SuperGLUECOPA),
        ('superglue-multirc', SuperGLUEMultiRC),
        ('superglue-wic', SuperGLUEWIC),
-        # ('superglue-wsc.fixed', SuperGLUEWSCFixed),
-        ('superglue-record', SuperGLUERecord)
+        # ('superglue-record', SuperGLUERecord)
+        ('beans', Beans)
    ]
 )

--- a/examples/examples_prompt/data_processors/utils.py
+++ b/examples/examples_prompt/data_processors/utils.py
@ -1,4 +1,5 @@
 import numpy as np
+import re

 def round_stsb_target(label):
    """STSB maps two sentences to a floating point number between 1 and 5
@ -15,3 +16,15 @@ def round_stsb_target(label):
    """
    return np.round((label * 5) / 5, decimals=1)

+
+def pad_punctuation(text):
+   """Re-implementation of _pad_punctuation in t5. This function adds spaces
+   around punctuation. While this pads punctuation as expected, it has the
+   unexpected effected of padding certain unicode characters with accents, with
+   spaces as well. For instance: "François" becomes "Fran ç ois"""
+   # Pad everything except for: underscores (_), whitespace (\s),
+   # numbers (\p{N}), letters (\p{L}) and accent characters (\p{M}).
+   text = re.sub(r'([^_\s\p{N}\p{L}\p{M}])', r' \1 ', text)
+   # Collapse consecutive whitespace into one space.
+   text = re.sub(r'\s+', ' ', text)
+   return text
--- a/examples/examples_prompt/hyperopt/search.sh
+++ b/examples/examples_prompt/hyperopt/search.sh
@ -0,0 +1,44 @@
+
+PATHBASE=/mnt/sfs_turbo/hsd/officialod/OpenDelta-1/examples/examples_prompt/
+PYTHONPATH=/mnt/sfs_turbo/zhangshudan/anaconda3/envs/officialod/bin/python
+PLMPATHBASE=/mnt/sfs_turbo/hsd/plm_cache/ # must be empty string or dir that ends with /
+DATASETSPATHBASE=/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/
+RUNTIME=$(date +%m%d%H%M%S)
+MODELNAME="roberta-base"
+DATASET=$1
+DELTATYPES=("none" "bitfit" "lora" "adapter")
+CUDAIDS=("0 1" "2 3" "4 5" "6 7")
+NUMTRIALS=50
+CONTINUESTUDY=${2:-'0'}
+
+echo $RUNTIME
+echo $MODELNAME
+echo $DATASET
+echo $DELTATYPE
+echo $CUDAIDS
+echo $NUMTRIALS
+echo $CONTINUESTUDY
+cd $PATHBASE
+
+
+
+for expid in 0 1 2 3
+do
+    ( $PYTHONPATH search_distributed.py \
+    --model_name $MODELNAME \
+    --dataset $DATASET \
+    --delta_type ${DELTATYPES[$expid]} \
+    --cuda_ids ${CUDAIDS[$expid]} \
+    --num_trials $NUMTRIALS \
+    --mode run \
+    --repeat_time 1 \
+    --main_file_name run_mlm.py \
+    --pathbase $PATHBASE \
+    --pythonpath $PYTHONPATH \
+    --plm_path_base $PLMPATHBASE \
+    --datasets_saved_path $DATASETSPATHBASE \
+    --datasets_load_from_disk \
+    --continue_study $CONTINUESTUDY >>/mnt/sfs_turbo/hsd/officialod/OpenDelta-1/examples/examples_prompt/out_sfs/$RUNTIME.txt 2>&1
+    ) &
+done
+wait
--- a/examples/examples_prompt/hyperopt/search_distributed.py
+++ b/examples/examples_prompt/hyperopt/search_distributed.py
--- a/examples/examples_prompt/hyperopt/search_single.py
+++ b/examples/examples_prompt/hyperopt/search_single.py
--- a/examples/examples_prompt/hyperopt/search_space.py
+++ b/examples/examples_prompt/hyperopt/search_space.py
--- a/examples/examples_prompt/hyperopt/seq2seq_trainer.py
+++ b/examples/examples_prompt/hyperopt/seq2seq_trainer.py
--- a/examples/examples_prompt/run.py
+++ b/examples/examples_prompt/run.py
@ -19,9 +19,9 @@ Fine-tuning the library models for sequence to sequence.
 import functools
 import logging
 from opendelta.utils.delta_center import create_hub_repo_name
-import torch 
+import torch
 import os
-os.environ['MKL_THREADING_LAYER'] = 'GNU' 
+os.environ['MKL_THREADING_LAYER'] = 'GNU'
 os.environ['MKL_SERVICE_FORCE_INTEL'] = '1'
 import sys
 import subprocess
@ -43,11 +43,11 @@ from transformers.trainer_utils import is_main_process, get_last_checkpoint
 from examples_prompt.data_processors import AutoTask, TaskDataCollatorForSeq2Seq, AutoPostProcessor
 from examples_prompt.seq2seq_trainer import Seq2SeqTrainer
 # from training_args import AdapterTrainingArguments
-from examples_prompt.trainers.trainer_utils import save_training_config 
+from examples_prompt.trainers.trainer_utils import save_training_config
 from dataclasses import dataclass, field

 from transformers.models.t5.modeling_t5 import T5Config, T5ForConditionalGeneration
-from examples_prompt.trainers.model_args import ModelArguments
+from examples_prompt.utils.args import ModelArguments
 from examples_prompt.trainers.trainer_args import TrainingArguments, DataTrainingArguments
 from transformers.trainer import Trainer
 from examples_prompt.metrics.metrics import transform_for_generation
@ -96,14 +96,14 @@ class RemainArgHfArgumentParser(HfArgumentParser):
            inputs = {k: data.pop(k) for k in list(data.keys()) if k in keys}
            obj = dtype(**inputs)
            outputs.append(obj)
-        
+
        remain_args = argparse.ArgumentParser()
        remain_args.__dict__.update(data)
        if return_remaining_args:
            return (*outputs, remain_args)
        else:
            return (*outputs,)
-    
+


 def main():
@ -132,7 +132,7 @@ def main():
                "Use --overwrite_output_dir to overcome."
            )
            '''
-            pass 
+            pass
        elif last_checkpoint is not None:
            logger.info(
                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
@ -233,7 +233,7 @@ def main():
    # Temporarily set max_target_length for training.
    #max_target_length = data_args.max_target_length
    padding = "max_length" if data_args.pad_to_max_length else False
-    
+
    def preprocess_function(examples, max_target_length):
        # max_target_length += 1
        model_inputs = tokenizer([s+"<extra_id_0>" for s in examples['source']], max_length=data_args.max_source_length,
@ -281,11 +281,11 @@ def main():
            )
        train_dataset = concatenate_datasets(train_datasets)
        print(f"Train dataset size {len(train_dataset)}")
-   
+
    if training_args.do_eval:
        eval_datasets = {eval_dataset: AutoTask.get(eval_dataset, eval_dataset_config,
            seed=data_args.data_seed).get(
-            split="validation", 
+            split="validation",
            split_validation_test=training_args.split_validation_test,
            add_prefix=True,
            n_obs=data_args.max_val_samples)
@ -305,7 +305,7 @@ def main():
    if training_args.do_test:
        test_datasets = {test_dataset: AutoTask.get(test_dataset, test_dataset_config,
            seed=data_args.data_seed).get(
-            split="test", 
+            split="test",
            split_validation_test=training_args.split_validation_test,
            add_prefix=True,
            n_obs=data_args.max_test_samples)
@ -340,10 +340,10 @@ def main():

    # Extracts the extra information needed to evaluate on each dataset.
    # These information are only used in the compute_metrics.
-    # We will assume that the test/eval dataloader does not change the order of 
+    # We will assume that the test/eval dataloader does not change the order of
    # the data.
    data_info = {"eval": eval_datasets[data_args.eval_dataset_name[0]]['extra_fields'],
-                 "test": test_datasets[data_args.test_dataset_name[0]]['extra_fields'], 
+                 "test": test_datasets[data_args.test_dataset_name[0]]['extra_fields'],
                 "train": train_dataset['extra_fields']}
    def compute_metrics(eval_preds):
        preds, labels, data_info = eval_preds
@ -383,7 +383,7 @@ def main():
    )


-    # Saves training config. 
+    # Saves training config.
    if trainer.is_world_process_zero():
       os.makedirs(training_args.output_dir, exist_ok=True)
       save_training_config(sys.argv[1], training_args.output_dir)
@ -401,15 +401,15 @@ def main():
            start = torch.cuda.Event(enable_timing=True)
            end = torch.cuda.Event(enable_timing=True)
            start.record()
-        
+
        train_result = trainer.train(resume_from_checkpoint=checkpoint)
-        
+
        if training_args.compute_time:
            end.record()
            torch.cuda.synchronize()  # wait for all_reduce to complete
            total_time = start.elapsed_time(end)/(1000*60)
            performance_metrics.update({"total_time in minutes ": total_time})
-        
+
        trainer.save_model()  # Saves the tokenizer too for easy upload
        train_metrics = train_result.metrics
        max_train_samples = (
@ -431,7 +431,7 @@ def main():
    if training_args.compute_memory or training_args.compute_time:
        print(performance_metrics)
        trainer.save_metrics("performance", performance_metrics)
-    
+
    # Evaluation
    results = {}
    if training_args.do_eval:
@ -455,9 +455,9 @@ def main():
            trainer.log_metrics("test", metrics)
            trainer.save_metrics("test", metrics)
        results['test'] = metrics
-    
+
    repo_name = create_hub_repo_name(root="DeltaHub",
-                         dataset=data_args.task_name, 
+                         dataset=data_args.task_name,
                         delta_type = delta_args.delta_type,
                         model_name_or_path= model_args.model_name_or_path)
    results['repo_name'] = repo_name
--- a/examples/examples_prompt/run_mlm.py
+++ b/examples/examples_prompt/run_mlm.py
@ -1,790 +0,0 @@
-# coding=utf-8
-# Copyright The HuggingFace Team and The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Fine-tuning the library models for sequence to sequence.
-"""
-# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
-import functools
-import logging
-# from opendelta.utils.delta_center import create_hub_repo_name
-import torch
-import os
-os.environ['MKL_THREADING_LAYER'] = 'GNU'
-os.environ['MKL_SERVICE_FORCE_INTEL'] = '1'
-import sys
-import subprocess
-from typing import Optional, List
-
-from datasets import load_dataset, load_metric, concatenate_datasets
-import transformers
-from transformers import (
-    AutoConfig,
-    AutoModelForMaskedLM,
-    AutoModelForSeq2SeqLM,
-    AutoTokenizer,
-    DataCollatorForSeq2Seq,
-    HfArgumentParser,
-    MBartTokenizer,
-    default_data_collator,
-    set_seed,
-)
-from transformers.trainer_utils import is_main_process, get_last_checkpoint
-# from ..seq2seq.utils import get_adapter_config
-from examples_prompt.data_processors import AutoTask #, #TaskDataCollatorForSeq2Seq, AutoPostProcessor, data_collator
-from transformers import Seq2SeqTrainer
-# from training_args import AdapterTrainingArguments
-from examples_prompt.trainers.trainer_utils import save_training_config
-from dataclasses import dataclass, field
-
-from transformers.models.t5.modeling_t5 import T5Config, T5ForConditionalGeneration
-from examples_prompt.trainers.model_args import ModelArguments
-from examples_prompt.trainers.trainer_args import TrainingArguments, DataTrainingArguments
-from transformers.trainer import Trainer
-from examples_prompt.metrics.metrics import transform_for_generation
-import json
-import numpy as np
-logger = logging.getLogger(__name__)
-
-import os
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
-
-TASK_TO_METRICS = {"mrpc": ["accuracy", "f1"],
-                  "cola": ['matthews_correlation'],
-                  "stsb": ['pearson', 'spearmanr'],
-                  'sst2': ['accuracy'],
-                  "mnli": ["accuracy"],
-                  "mnli_mismatched": ["accuracy"],
-                  "mnli_matched": ["accuracy"],
-                  "qnli": ["accuracy"],
-                  "rte": ["accuracy"],
-                  "wnli": ["accuracy"],
-                  "qqp": ["accuracy", "f1"],
-                  "superglue-boolq": ["accuracy"],
-                  "superglue-rte": ["accuracy"],
-                  "superglue-cb": ["f1_multiclass", "accuracy"],
-                  "superglue-copa": ["accuracy"],
-                  "superglue-multirc": ["f1", "em"],
-                  "superglue-wic": ["accuracy"],
-                  "superglue-wsc.fixed": ["accuracy"],
-                  "superglue-record": ["f1", "em"]
-         }
-
-
-class RemainArgHfArgumentParser(HfArgumentParser):
-    def parse_json_file(self, json_file: str, return_remaining_args=True ):
-        """
-        Alternative helper method that does not use `argparse` at all, instead loading a json file and populating the
-        dataclass types.
-        """
-        import argparse
-        import json
-        from pathlib import Path
-        import dataclasses
-
-        data = json.loads(Path(json_file).read_text())
-        outputs = []
-        for dtype in self.dataclass_types:
-            keys = {f.name for f in dataclasses.fields(dtype) if f.init}
-            inputs = {k: data.pop(k) for k in list(data.keys()) if k in keys}
-            obj = dtype(**inputs)
-            outputs.append(obj)
-
-        remain_args = argparse.ArgumentParser()
-        remain_args.__dict__.update(data)
-        if return_remaining_args:
-            return (*outputs, remain_args)
-        else:
-            return (*outputs,)
-
-
-
-def main():
-    # See all possible arguments in src/transformers/training_args.py
-    # or by passing the --help flag to this script.
-    # We now keep distinct sets of args, for a cleaner separation of concerns.
-    parser = RemainArgHfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
-    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
-        # If we pass only one argument to the script and it's the path to a json file,
-        # let's parse it to get our arguments.
-        model_args, data_args, training_args, delta_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
-    else:
-        model_args, data_args, training_args, delta_args = parser.parse_args_into_dataclasses(return_remaining_strings=True)
-
-
-    print(f"{training_args.output_dir}/results.json")
-    # exit()
-    # Detecting last checkpoint.
-    last_checkpoint = None
-    if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
-        last_checkpoint = get_last_checkpoint(training_args.output_dir)
-        print("#### last_checkpoint ", last_checkpoint)
-        if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
-            '''
-            raise ValueError(
-                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
-                "Use --overwrite_output_dir to overcome."
-            )
-            '''
-            pass
-        elif last_checkpoint is not None:
-            logger.info(
-                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
-                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
-            )
-
-    # Setup logging
-    logging.basicConfig(
-        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
-        datefmt="%m/%d/%Y %H:%M:%S",
-        handlers=[logging.StreamHandler(sys.stdout)],
-    )
-    logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
-
-    # Log on each process the small summary:
-    logger.warning(
-        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
-        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
-    )
-    # Set the verbosity to info of the Transformers logger (on main process only):
-    if is_main_process(training_args.local_rank):
-        transformers.utils.logging.set_verbosity_info()
-    # logger.info("Training/evaluation parameters %s", training_args, model_args, data_args, delta_args)
-    logger.info("{}\n{}\n{}\n{}".format(training_args, model_args, data_args, delta_args))
-
-
-    # Set seed before initializing model.
-    set_seed(training_args.seed)
-
-    # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
-    # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
-    # (the dataset will be downloaded automatically from the datasets Hub).
-    #
-    # For CSV/JSON files in the summarization task, this script will use the first column for the full texts and the
-    # second column for the summaries (unless you specify column names for this with the `text_column` and
-    # `summary_column` arguments).
-    # For translation, only JSON files are supported, with one field named "translation" containing two keys for the
-    # source and target languages (unless you adapt what follows).
-    #
-    # In distributed training, the load_dataset function guarantee that only one local process can concurrently
-    # download the dataset.
-    # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
-    # https://huggingface.co/docs/datasets/loading_datasets.html.
-
-    # Load pretrained model and tokenizer
-    #
-    # Distributed training:
-    # The .from_pretrained methods guarantee that only one local process can concurrently
-    # download model & vocab.
-
-    config = AutoConfig.from_pretrained(
-        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
-        cache_dir=model_args.cache_dir,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-    config.dropout_rate = 0.0
-    tokenizer = AutoTokenizer.from_pretrained(
-        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
-        cache_dir=model_args.cache_dir,
-        use_fast=model_args.use_fast_tokenizer,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-
-    if training_args.predict_with_generate:
-        model = AutoModelForSeq2SeqLM.from_pretrained(
-        model_args.model_name_or_path,
-        from_tf=bool(".ckpt" in model_args.model_name_or_path),
-        config=config,
-        cache_dir=model_args.cache_dir,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-        )
-    else:
-        model = AutoModelForMaskedLM.from_pretrained(
-            model_args.model_name_or_path,
-            from_tf=bool(".ckpt" in model_args.model_name_or_path),
-            config=config,
-            cache_dir=model_args.cache_dir,
-            revision=model_args.model_revision,
-            use_auth_token=True if model_args.use_auth_token else None,
-        )
-        model.resize_token_embeddings(len(tokenizer))
-
-
-
-
-
-    if delta_args.delta_type.lower() != "none":
-        from opendelta import AutoDeltaConfig,AutoDeltaModel
-        delta_config = AutoDeltaConfig.from_dict(vars(delta_args))
-        delta_model = AutoDeltaModel.from_config(delta_config, backbone_model=model)
-        delta_model.freeze_module(set_state_dict = True)
-        delta_model.log(delta_ratio=True, trainable_ratio=True, visualization=True)
-
-
-    # model parallelize
-    if hasattr(training_args, "model_parallel") and training_args.model_parallel:
-        logger.info('parallelize model!')
-        model.parallelize()
-
-    data_args.dataset_name = [data_args.task_name]
-    data_args.eval_dataset_name = [data_args.eval_dataset_name]
-    data_args.test_dataset_name = [data_args.test_dataset_name]
-    data_args.dataset_config_name = [data_args.dataset_config_name]
-    data_args.eval_dataset_config_name = [data_args.eval_dataset_config_name]
-    data_args.test_dataset_config_name = [data_args.test_dataset_config_name]
-    assert len(data_args.dataset_name) == len(data_args.dataset_config_name)
-    if data_args.eval_dataset_name is not None:
-        assert len(data_args.eval_dataset_name) == len(data_args.eval_dataset_config_name)
-    if data_args.test_dataset_name is not None:
-        assert len(data_args.test_dataset_name) == len(data_args.test_dataset_config_name)
-
-    # Temporarily set max_target_length for training.
-    #max_target_length = data_args.max_target_length
-
-
-
-
-
-
-    column_names = ['source', 'target', 'label', 'extra_fields']
-    performance_metrics = {}
-
-
-
-
-    def get_prompts(task, tokenizer, predict_with_generate, template_id="0", verbalizer_id="0"):
-        # tid = getattr(config, "template_id", "0")
-        # vid = getattr(config, "verbalizer_id", "0")
-        from openpromptu.prompts import GenerationVerbalizer, ManualVerbalizer
-        from openpromptu.prompts import ManualTemplate
-        template = ManualTemplate(text = task.templates_text[template_id])
-        if predict_with_generate:
-            verbalizer = GenerationVerbalizer(tokenizer=tokenizer, classes = task.labels_list, label_words=task.verbalizers[verbalizer_id])
-        else:
-            verbalizer = ManualVerbalizer(tokenizer=tokenizer, classes = task.labels_list, label_words=task.verbalizers[verbalizer_id])
-            # max_target_length = self.get_max_target_length(self.default_max_length)
-
-        from openpromptu import TokenizerWrapper
-        tokenizer_wrapper = TokenizerWrapper(max_seq_length=data_args.max_source_length, tokenizer=tokenizer, truncate_method="balanced", mask_token_func=mask_token_func)
-        return template, verbalizer, tokenizer_wrapper
-
-
-    from openpromptu.data_utils import InputExample
-
-    max_target_length = 32
-
-    if os.path.basename(model_args.model_name_or_path).startswith("t5"):
-        mask_token_func = lambda i: tokenizer.additional_special_tokens[i]
-        def preprocess_function(raw_example, **kwargs):
-            # max_target_length += 1
-            tokenizer = kwargs['tokenizer']
-            data_args = kwargs['data_args']
-            template = kwargs['template']
-            verbalizer = kwargs['verbalizer']
-            tokenizer_wrapper = kwargs['tokenizer_wrapper']
-            split = kwargs['split']
-            # extra_fileds = example['extra_fields']
-
-            example = InputExample(**raw_example)
-
-            # from collections import namedtuple
-            # example['tgt_text'] = ""
-            # example = namedtuple("ObjectName", example.keys())(*example.values())
-            try:
-                example = verbalizer.wrap_one_example(example)
-                example, other = template.wrap_one_example(example)
-                input_sentence = tokenizer_wrapper.merge_wrapped_example(example)
-                model_inputs = tokenizer(input_sentence, max_length=256,
-                                    padding="max_length", truncation=True)
-            except:
-                from IPython import embed; embed(header="Therer")
-
-
-            # if split == "train":
-            with tokenizer.as_target_tokenizer():
-                label = tokenizer(other['tgt_text']).input_ids
-                # label = [l if l != tokenizer.pad_token_id else -100 for l in label]
-
-            # from IPython import embed; embed(header="Therer")
-            model_inputs["labels"] = label
-            # else:
-            #     # from IPython import embed; embed(header="Therer")
-            #     model_inputs["tgt_text"] = other['tgt_text']
-            #     model_inputs['labels'] = None            # model_inputs["extra_fields"] = extra_fileds
-            # from IPython import embed; embed(header="Therer2")
-            return model_inputs
-
-        def compute_metrics(eval_preds, tokenizer, dataset_name, eval_metric):
-            # from IPython import embed; embed(header="In compute metrics")
-            preds, labels = eval_preds
-            decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
-            decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
-            # post_processor = .get(data_args.dataset_name[0], tokenizer,
-            #                                     data_args.ignore_pad_token_for_loss)
-            # decoded_preds, decoded_labels = post_processor.process(preds, labels, data_info)
-            result = {}
-            for metric in eval_metric:
-                result.update(metric(decoded_preds, decoded_labels))
-
-            average_metric = sum(result.values())/len(result)
-            result.update({"average_metrics":average_metric})
-            return result
-
-
-
-    elif os.path.basename(model_args.model_name_or_path).startswith("roberta") \
-        or os.path.basename(model_args.model_name_or_path).startswith("bert"):
-        mask_token_func = lambda i: tokenizer.mask_token
-        def preprocess_function(raw_example, **kwargs):
-            # max_target_length += 1
-
-            # from IPython import embed; embed(header="Therer")
-            tokenizer = kwargs['tokenizer']
-
-            data_args = kwargs['data_args']
-            template = kwargs['template']
-            verbalizer = kwargs['verbalizer']
-            tokenizer_wrapper = kwargs['tokenizer_wrapper']
-
-            example = InputExample(**raw_example)
-
-            # from collections import namedtuple
-            # example['tgt_text'] = ""
-            # example = namedtuple("ObjectName", example.keys())(*example.values())
-            # try:
-                # example = verbalizer.wrap_one_example(example)
-            example, other = template.wrap_one_example(example)
-            input_sentence = tokenizer_wrapper.merge_wrapped_example(example)
-            model_inputs = tokenizer(input_sentence, max_length=256,
-                                padding="max_length", truncation=True)
-
-
-
-            # print("max_length", data_args.max_source_length)
-            # model_inputs = tokenizer(examples['source'], max_length=data_args.max_source_length,
-            #                         padding="max_length", truncation=True)
-
-            # mask_position = [(id, input_id.index(tokenizer.mask_token_id)) for id, input_id in enumerate(model_inputs.input_ids)]# [[-100 if i != tokenizer.mask_token_id else tokenizer.convert_tokens_to_ids(target) for i in input_id] for input_id, target in zip(model_inputs.input_ids, examples['target'])]
-            # model_inputs["mask_position"] = mask_position
-            # model_inputs["extra_fields"] = examples['extra_fields']
-            # from IPython import embed; embed(header="Therer")
-            return model_inputs
-
-        def compute_metrics(eval_preds, dataset_name):
-            # from IPython import embed; embed(header="In compute metrics")
-
-            preds, labels = eval_preds.predictions, eval_preds.label_ids
-
-            preds = np.argmax(preds, axis=-1)
-
-            result = {}
-            average_metrics = []
-            for metric in eval_metric:
-                metric_item = metric(preds, labels)
-                metric_value =  list(metric_item.values())
-                result.update(metric_item)
-                average_metrics.extend(metric_value)
-            print("average:",average_metrics)
-            average_metric = sum(average_metrics)/len(average_metrics)
-            result.update({"average_metrics":average_metric})
-            return result
-
-
-
-
-
-    if training_args.do_train:
-
-        train_task = AutoTask.get(data_args.task_name,
-                                       data_args.dataset_config_name,
-                                       data_args=data_args,
-                                    #    tokenizer=tokenizer,
-                                    #    predict_with_generate=training_args.predict_with_generate,
-                                       seed=data_args.data_seed)
-
-        train_dataset = train_task.get(split='train',
-                                   split_validation_test=training_args.split_validation_test,
-                                   n_obs=data_args.max_train_samples)
-
-        template, verbalizer, tokenizer_wrapper = get_prompts(train_task, tokenizer, training_args.predict_with_generate)
-
-
-
-        train_dataset = train_dataset.map(
-                            functools.partial(preprocess_function,
-                            data_args=data_args,
-                            tokenizer=tokenizer,
-                            template=template,
-                            verbalizer=verbalizer,
-                            tokenizer_wrapper=tokenizer_wrapper,
-                            split="train"),
-                            batched=False,
-                            num_proc=data_args.preprocessing_num_workers,
-                            remove_columns=[x for x in train_dataset.features if x not in ("label",)], # if train_dataset != "superglue-record" else column_names+["answers"],
-                            load_from_cache_file=not data_args.overwrite_cache,
-                        )
-
-
-
-
-
-    eval_splits_names = []
-
-    if training_args.do_eval:
-        eval_splits_names.append("eval")
-    if training_args.do_test:
-        eval_splits_names.append("test")
-    eval_splits = {}
-    for split_name in eval_splits_names:
-        eval_task = AutoTask.get(data_args.task_name,
-                                data_args.dataset_config_name,
-                                data_args=data_args,
-                                # tokenizer=tokenizer,
-                                # predict_with_generate=training_args.predict_with_generate,
-                                seed=data_args.data_seed)
-            # for dataset_name, dataset_config_name\
-            # in zip(getattr(data_args,f"{split_name}_dataset_name"), getattr(data_args, f"{split_name}_dataset_config_name"))}
-
-        eval_dataset = eval_task.get(split=split_name,
-                                  split_validation_test=training_args.split_validation_test,
-                                  n_obs=data_args.max_train_samples)
-
-
-
-        template, _verbalizer, tokenizer_wrapper = get_prompts(eval_task, tokenizer, training_args.predict_with_generate)
-
-        eval_dataset = eval_dataset.map(
-                            functools.partial(preprocess_function,
-                            data_args=data_args,
-                            tokenizer=tokenizer,
-                            template=template,
-                            verbalizer=_verbalizer,
-                            tokenizer_wrapper=tokenizer_wrapper,
-                            split=split_name),
-                            batched=False,
-                            num_proc=data_args.preprocessing_num_workers,
-                            remove_columns=[x for x in eval_dataset.features if x not in ("label",)], # if train_dataset != "superglue-record" else column_names+["answers"],
-                            load_from_cache_file=not data_args.overwrite_cache,
-                        )
-
-
-        eval_splits[split_name] = eval_dataset
-        if split_name == "test":
-            eval_metric = eval_task.metric
-            verbalizer = _verbalizer
-
-
-
-    class MLMTrainer(Trainer):
-        def __init__(self, verbalizer=None, **kwargs):
-            super().__init__(**kwargs)
-            self.verbalizer=verbalizer
-
-        # def training_step(self, model, inputs):
-        #     from IPython import embed; embed(header="in trainstep")
-        #     return super().training_step(model, inputs)
-        def compute_loss(self, model, inputs, return_outputs=False):
-
-            labels = inputs.pop('labels')
-            # extra_fields = inputs.pop("extra_fields")
-            outputs = model(**inputs)
-            logits = outputs.get("logits")
-            input_ids = inputs['input_ids']
-
-
-
-            # from IPython import embed; embed(header="382")
-            verbalizer = self.verbalizer.cuda()
-            logits_at_mask = logits[torch.where(input_ids == verbalizer.tokenizer.mask_token_id)]
-            label_logits = verbalizer.process_logits(logits_at_mask)
-            loss_fct = torch.nn.CrossEntropyLoss()
-            # from IPython import embed; embed(header="In compute loss")
-            loss = loss_fct(label_logits, labels)
-            outputs.logits = label_logits
-            return (loss, outputs) if return_outputs else loss
-
-
-    class MySeq2SeqTrainer(Seq2SeqTrainer):
-        def compute_loss(self, model, inputs, return_outputs=False):
-            # from IPython import embed; embed(header="agag")
-
-            intlabel = inputs.pop('label')
-            # extra_fields = inputs.pop("extra_fields")
-            outputs = model(**inputs)
-            # logits = outputs.get("logits")
-            # input_ids = inputs['input_ids']
-
-
-
-            # # from IPython import embed; embed(header="382")
-            # verbalizer = self._verbalizers.cuda()
-            # logits_at_mask = logits[torch.where(input_ids == verbalizer.tokenizer.mask_token_id)]
-            # label_logits = verbalizer.process_logits(logits_at_mask)
-            # loss_fct = torch.nn.CrossEntropyLoss()
-            # # from IPython import embed; embed(header="In compute loss")
-            # loss = loss_fct(label_logits, labels)
-            # outputs.logits = label_logits
-            if return_outputs:
-                return (outputs.loss, outputs)
-            else:
-                return outputs.loss
-
-
-        # def evaluate(
-        #     self,
-        #     eval_dataset: Optional[Dict[str, Dataset]] = None,
-        #     ignore_keys: Optional[List[str]] = None,
-        #     metric_key_prefix: str = "eval",
-        #     max_length: Optional[int] = None,
-        #     num_beams: Optional[int] = None,
-        # ) -> Dict[str, float]:
-        #     # TODO: this also needs to be set per dataset
-        #     self._max_length = max_length
-        #     self._num_beams = num_beams
-        #     return super().evaluate(eval_dataset, ignore_keys=ignore_keys, metric_key_prefix=metric_key_prefix)
-
-
-        def prediction_step(
-            self,
-            model, #nn.Module,
-            inputs, #Dict[str, Union[torch.Tensor, Any]],
-            prediction_loss_only, #: bool,
-            ignore_keys, #: Optional[List[str]] = None,
-        ): #-> Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]:
-            """
-            Perform an evaluation step on :obj:`model` using obj:`inputs`.
-
-            Subclass and override to inject custom behavior.
-
-            Args:
-                model (:obj:`nn.Module`):
-                    The model to evaluate.
-                inputs (:obj:`Dict[str, Union[torch.Tensor, Any]]`):
-                    The inputs and targets of the model.
-
-                    The dictionary will be unpacked before being fed to the model. Most models expect the targets under the
-                    argument :obj:`labels`. Check your model's documentation for all accepted arguments.
-                prediction_loss_only (:obj:`bool`):
-                    Whether or not to return the loss only.
-
-            Return:
-                Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]: A tuple with the loss, logits and
-                labels (each being optional).
-            """
-            if not self.args.predict_with_generate or prediction_loss_only:
-                return super().prediction_step(
-                    model, inputs, prediction_loss_only=prediction_loss_only, ignore_keys=ignore_keys
-                )
-
-
-            has_labels = "labels" in inputs
-            inputs = self._prepare_inputs(inputs)
-            intlabel = inputs.pop('label')
-            gen_kwargs = {
-                "max_length": 10, # self._max_length if s is not None else self.model.config.max_length,
-                "num_beams": 1 #self._num_beams if self._num_beams is not None else self.model.config.num_beams,
-            }
-            generated_tokens = self.model.generate(
-                inputs["input_ids"],
-                attention_mask=inputs["attention_mask"],
-                **gen_kwargs,
-            )
-            # in case the batch is shorter than max length, the output should be padded
-            if generated_tokens.shape[-1] < gen_kwargs["max_length"]:
-                generated_tokens = self._pad_tensors_to_max_len(generated_tokens, gen_kwargs["max_length"])
-
-            with torch.no_grad():
-
-                outputs = model(**inputs)
-                if has_labels:
-                    if self.label_smoother is not None:
-                        loss = self.label_smoother(outputs, inputs["labels"]).mean().detach()
-                    else:
-                        loss = (outputs["loss"] if isinstance(outputs, dict) else outputs[0]).mean().detach()
-                else:
-                    loss = None
-
-            if self.args.prediction_loss_only:
-                return (loss, None, None)
-
-            labels = inputs["labels"]
-            if labels.shape[-1] < gen_kwargs["max_length"]:
-                labels = self._pad_tensors_to_max_len(labels, gen_kwargs["max_length"])
-
-            # from IPython import embed; embed(header="In seqseqtrainer")
-            return (loss, generated_tokens, labels)
-
-
-
-
-
-        # def prediction_step(self, model, inputs, prediction_loss_only, ignore_keys):
-        #     aa = super().prediction_step(model, inputs, prediction_loss_only, ignore_keys)
-        #     # from IPython import embed; embed()
-        #     return aa
-    # from transformers.data.data_collator import torch_default_data_collator , DataCollatorMixin
-    # class DataCollatorWithExtraFields(DataCollatorMixin):
-    #     return_tensors: str = "pt"
-    #     def torch_call(self, features):
-    #         # print(len(features))
-    #         # extra_fields = [f.pop('extra_fields') for f in features]
-    #         batch = torch_default_data_collator(features)
-    #         batch['extra_fields'] =extra_fields
-    #         # print(batch['input_ids'].size())
-    #         # print(batch['labels'].size())
-    #         return batch
-
-
-    # from transformers.data.data_collator import DefaultDataCollator
-    # class CustomDataCollator(DefaultDataCollator):
-
-    #     def __call__(self, features):
-    #         mask_position = [d.pop('mask_position') for d in features]
-    #     #    self.check_uniqueness(tasks)
-    #         from IPython import embed; embed(header="featurres")
-    #         output = super().__call__(features)
-    #         # mask_positions = [d.pop('mask_position') for d in features]
-    #         output["mask_position"] = mask_position
-    #         return output
-
-
-    training_args.remove_unused_columns = False
-
-    if os.path.basename(model_args.model_name_or_path).startswith("roberta") or \
-        os.path.basename(model_args.model_name_or_path).startswith("bert"):
-        trainer = MLMTrainer(
-            model=model,
-            args=training_args,
-            train_dataset=train_dataset if training_args.do_train else None,
-            eval_dataset=eval_splits['eval'] if training_args.do_eval else None,
-            compute_metrics=functools.partial(compute_metrics, dataset_name=data_args.task_name),
-            tokenizer=tokenizer,
-            # data_collator=DataCollatorWithExtraFields(),
-            verbalizer=verbalizer,
-        )
-    elif os.path.basename(model_args.model_name_or_path).startswith("t5"):
-        trainer = MySeq2SeqTrainer(
-            model=model,
-            args=training_args,
-            train_dataset=train_dataset if training_args.do_train else None,
-            eval_dataset=eval_splits['eval'] if training_args.do_eval else None,
-            compute_metrics=functools.partial(compute_metrics, tokenizer=tokenizer, dataset_name=data_args.task_name, eval_metric=eval_metric),
-            tokenizer=tokenizer,
-            data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer),
-        )
-
-
-
-    # Saves training config.
-    if trainer.is_world_process_zero():
-       os.makedirs(training_args.output_dir, exist_ok=True)
-       save_training_config(sys.argv[1], training_args.output_dir)
-
-    # Training
-    if training_args.do_train:
-        checkpoint = None
-        if training_args.resume_from_checkpoint is not None:
-            checkpoint = training_args.resume_from_checkpoint
-        elif last_checkpoint is not None:
-            checkpoint = last_checkpoint
-
-        if training_args.compute_time:
-            torch.cuda.synchronize()  # wait for move to complete
-            start = torch.cuda.Event(enable_timing=True)
-            end = torch.cuda.Event(enable_timing=True)
-            start.record()
-
-        train_result = trainer.train(resume_from_checkpoint=checkpoint)
-
-        if training_args.compute_time:
-            end.record()
-            torch.cuda.synchronize()  # wait for all_reduce to complete
-            total_time = start.elapsed_time(end)/(1000*60)
-            performance_metrics.update({"total_time in minutes ": total_time})
-
-        trainer.save_model()  # Saves the tokenizer too for easy upload
-        train_metrics = train_result.metrics
-        max_train_samples = (
-            data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset)
-        )
-        train_metrics["train_samples"] = min(max_train_samples, len(train_dataset))
-        trainer.log_metrics("train", train_metrics)
-        trainer.save_metrics("train", train_metrics)
-        trainer.save_state()
-
-    if torch.cuda.is_available() and training_args.compute_memory:
-        peak_memory = (torch.cuda.max_memory_allocated() / 1024 ** 2)/1000
-        print(
-            "Memory utilization",
-            peak_memory,
-            "GB"
-        )
-        performance_metrics.update({"peak_memory": peak_memory})
-    if training_args.compute_memory or training_args.compute_time:
-        print(performance_metrics)
-        trainer.save_metrics("performance", performance_metrics)
-
-    # Evaluation
-    all_results = {}
-
-    all_results['evaluate'] = {}
-
-    if training_args.do_eval:
-        logger.info("*** Evaluate ***")
-
-        metrics = trainer.evaluate(eval_dataset=eval_splits['eval'],
-        )
-        trainer.log_metrics(f"{data_args.task_name}_eval", metrics)
-        trainer.save_metrics(f"{data_args.task_name}_eval", metrics)
-        all_results['evaluate'][data_args.task_name] = metrics
-
-    # Test
-    all_results['test'] = {}
-    if training_args.do_test:
-        logger.info("*** Test ***")
-        metrics = trainer.evaluate(eval_dataset=eval_splits['test'],
-        metric_key_prefix="test"
-        )
-        trainer.log_metrics(f"{data_args.task_name}_test", metrics)
-        trainer.save_metrics(f"{data_args.task_name}_test", metrics)
-        all_results['test'][data_args.task_name] = metrics
-
-    # repo_name = create_hub_repo_name(root="DeltaHub",
-    #                      dataset=data_args.task_name,
-    #                      delta_type = delta_args.delta_type,
-    #                      model_name_or_path= model_args.model_name_or_path)
-    # results['repo_name'] = repo_name
-    # if delta_args.delta_type.lower() != "none":
-    #     if training_args.push_to_hub: # TODO add description here
-    #         delta_model.save_finetuned(push_to_hub=True, save_directory=repo_name, use_auth_token=True)
-    #         # trainer.push_to_hub(**kwargs)
-    #     else:
-    #         delta_model.save_finetuned(push_to_hub=False, save_directory=repo_name, use_auth_token=True)
-
-
-    with open(f"{training_args.output_dir}/results.json", 'w') as fout:
-        string = json.dumps(all_results, indent=4,sort_keys=True)
-        fout.write(string+"\n")
-
-    return all_results
-
-
-
-
-if __name__ == "__main__":
-    result = main()
-
--- a/examples/examples_prompt/src/run.py
+++ b/examples/examples_prompt/src/run.py
@ -0,0 +1,323 @@
+# coding=utf-8
+# Copyright OpenDelta Team and THUNLP lab. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+A unified runing scripts for most models to do down stream tasks in a
+prompt learning fashion, i.e., No classification head, all tasks are casted
+to mask prediction or span prediction tasks.
+
+Processing relevant to different backbone models are stored in ../backbones/
+
+Adding A few lines to integrate the Delta tuning methods.
+
+You can also adapt this script on your own tasks.
+"""
+
+import os
+import sys
+os.environ['MKL_THREADING_LAYER'] = 'GNU'
+os.environ['MKL_SERVICE_FORCE_INTEL'] = '1'
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+sys.path.append(os.path.join(os.getcwd(), "../"))
+sys.path.append(os.path.join(os.getcwd()))
+
+import functools
+import logging
+import torch
+import json
+import numpy as np
+
+import transformers
+from transformers import (
+    AutoConfig,
+    AutoModelForMaskedLM,
+    AutoModelForSeq2SeqLM,
+    AutoTokenizer,
+    DataCollatorForSeq2Seq,
+    # HfArgumentParser,
+    # MBartTokenizer,
+    # default_data_collator,
+    Trainer,
+    Seq2SeqTrainer,
+    set_seed,
+)
+from transformers.trainer_utils import is_main_process, get_last_checkpoint
+
+from data_processors import AutoTask #, #TaskDataCollatorForSeq2Seq, AutoPostProcessor, data_collator
+from utils import read_json, save_json
+from utils.args import ModelArguments, TrainingArguments, DataTrainingArguments, RemainArgHfArgumentParser
+
+
+logger = logging.getLogger(__name__)
+
+
+def main():
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = RemainArgHfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args, delta_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args, delta_args = parser.parse_args_into_dataclasses(return_remaining_strings=True)
+
+
+    print(f"{training_args.output_dir}/results.json")
+    # exit()
+    # Detecting last checkpoint.
+    last_checkpoint = None
+    if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
+        last_checkpoint = get_last_checkpoint(training_args.output_dir)
+        print("#### last_checkpoint ", last_checkpoint)
+        if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
+            '''
+            raise ValueError(
+                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
+                "Use --overwrite_output_dir to overcome."
+            )
+            '''
+            pass
+        elif last_checkpoint is not None:
+            logger.info(
+                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
+                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
+            )
+
+    # Setup logging
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
+
+    # Log on each process the small summary:
+    logger.warning(
+        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
+        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
+    )
+    # Set the verbosity to info of the Transformers logger (on main process only):
+    if is_main_process(training_args.local_rank):
+        transformers.utils.logging.set_verbosity_info()
+    # logger.info("Training/evaluation parameters %s", training_args, model_args, data_args, delta_args)
+    logger.info("{}\n{}\n{}\n{}".format(training_args, model_args, data_args, delta_args))
+
+
+    # Set seed before initializing model.
+    set_seed(training_args.seed)
+
+
+
+    if os.path.basename(model_args.model_name_or_path).startswith("t5"):
+        from examples_prompt.backbones.t5 import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts
+        from examples_prompt.backbones.t5 import Trainer, DataCollator
+    elif os.path.basename(model_args.model_name_or_path).startswith("roberta") \
+        or os.path.basename(model_args.model_name_or_path).startswith("bert") \
+          or os.path.basename(model_args.model_name_or_path).startswith("albert") :
+        from examples_prompt.backbones.bert import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts
+        from examples_prompt.backbones.bert import Trainer, DataCollator
+    elif os.path.basename(model_args.model_name_or_path).startswith("beit"):
+        from examples_prompt.backbones.beit import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts
+        from examples_prompt.backbones.beit import Trainer, DataCollator
+
+
+
+
+
+    config, tokenizer, model = get_backbone(model_args=model_args)
+
+    if delta_args.delta_type.lower() != "none":
+        from opendelta import AutoDeltaConfig,AutoDeltaModel
+        delta_config = AutoDeltaConfig.from_dict(vars(delta_args))
+        delta_model = AutoDeltaModel.from_config(delta_config, backbone_model=model)
+        delta_model.freeze_module(set_state_dict = True)
+        delta_model.log(delta_ratio=True, trainable_ratio=True, visualization=True)
+
+    # model parallelize
+    if hasattr(training_args, "model_parallel") and training_args.model_parallel:
+        logger.info('parallelize model!')
+        model.parallelize()
+
+
+
+    performance_metrics = {}
+
+
+
+
+    non_empty_splits_names = []
+    if training_args.do_train:
+        non_empty_splits_names.append("train")
+    if training_args.do_eval:
+        non_empty_splits_names.append("eval")
+    if training_args.do_test:
+        non_empty_splits_names.append("test")
+    splits = {}
+    for split_name in ['train', 'eval', 'test']:
+        if split_name not in non_empty_splits_names:
+            splits[split_name] = None
+            continue
+
+        task = AutoTask.get(data_args.task_name,
+                            data_args.dataset_config_name,
+                            data_args=data_args,
+                            seed=data_args.data_seed)
+
+        dataset =  task.get(split=split_name,
+                            split_validation_test=training_args.split_validation_test,
+                            n_obs=data_args.max_train_samples)
+
+
+
+        template, _verbalizer, tokenizer_wrapper = get_prompts(task, tokenizer, training_args)
+
+
+        dataset = dataset.map(
+                            functools.partial(preprocess_function,
+                                            data_args=data_args,
+                                            tokenizer=tokenizer,
+                                            template=template,
+                                            verbalizer=_verbalizer,
+                                            tokenizer_wrapper=tokenizer_wrapper,
+                                            split=split_name),
+                            batched=False,
+                            num_proc=data_args.preprocessing_num_workers,
+                            remove_columns=get_remove_columns(list(dataset.features.keys())),
+                            load_from_cache_file=not data_args.overwrite_cache,
+                        )
+        # from IPython import embed; embed()
+        splits[split_name] = dataset
+        if split_name == "eval":
+            eval_task = task
+            verbalizer = _verbalizer
+
+
+
+    trainer = Trainer(
+        model=model,
+        verbalizer=verbalizer,
+        eval_task=eval_task,
+        args=training_args,
+        train_dataset=splits['train'],
+        eval_dataset=splits['eval'],
+        tokenizer=tokenizer,
+        data_collator=DataCollator(tokenizer),
+    )
+
+
+    def save_training_config(config_file, output_dir):
+        json_data = read_json(config_file)
+        save_json(os.path.join(output_dir, "training_config.json"), json_data)
+
+
+    # Saves training config.
+    if trainer.is_world_process_zero():
+        save_training_config(sys.argv[1], training_args.output_dir)
+
+    # Training
+    if training_args.do_train:
+        checkpoint = None
+        if training_args.resume_from_checkpoint is not None:
+            checkpoint = training_args.resume_from_checkpoint
+        elif last_checkpoint is not None:
+            checkpoint = last_checkpoint
+
+        if training_args.compute_time:
+            torch.cuda.synchronize()  # wait for move to complete
+            start = torch.cuda.Event(enable_timing=True)
+            end = torch.cuda.Event(enable_timing=True)
+            start.record()
+
+        train_result = trainer.train(resume_from_checkpoint=checkpoint)
+
+        if training_args.compute_time:
+            end.record()
+            torch.cuda.synchronize()  # wait for all_reduce to complete
+            total_time = start.elapsed_time(end)/(1000*60)
+            performance_metrics.update({"total_time in minutes ": total_time})
+
+        trainer.save_model()  # Saves the tokenizer too for easy upload
+        train_metrics = train_result.metrics
+        max_train_samples = (
+            data_args.max_train_samples if data_args.max_train_samples is not None else len(splits['train'])
+        )
+        train_metrics["train_samples"] = min(max_train_samples, len(splits['train']))
+        trainer.log_metrics("train", train_metrics)
+        trainer.save_metrics("train", train_metrics)
+        trainer.save_state()
+
+    if torch.cuda.is_available() and training_args.compute_memory:
+        peak_memory = (torch.cuda.max_memory_allocated() / 1024 ** 2)/1000
+        print(
+            "Memory utilization",
+            peak_memory,
+            "GB"
+        )
+        performance_metrics.update({"peak_memory": peak_memory})
+    if training_args.compute_memory or training_args.compute_time:
+        print("Efficiency Statistics {}".format(performance_metrics))
+        trainer.save_metrics("performance", performance_metrics)
+
+    # Evaluation
+    all_results = {}
+
+    all_results['evaluate'] = {}
+
+    if training_args.do_eval:
+        logger.info("*** Evaluate ***")
+
+        metrics = trainer.evaluate(eval_dataset=splits['eval'],
+        )
+        trainer.log_metrics(f"{data_args.task_name}_eval", metrics)
+        trainer.save_metrics(f"{data_args.task_name}_eval", metrics)
+        all_results['evaluate'][data_args.task_name] = metrics
+
+    # Test
+    all_results['test'] = {}
+    if training_args.do_test:
+        logger.info("*** Test ***")
+        metrics = trainer.evaluate(eval_dataset=splits['test'],
+        metric_key_prefix="test"
+        )
+        trainer.log_metrics(f"{data_args.task_name}_test", metrics)
+        trainer.save_metrics(f"{data_args.task_name}_test", metrics)
+        all_results['test'][data_args.task_name] = metrics
+
+    # repo_name = create_hub_repo_name(root="DeltaHub",
+    #                      dataset=data_args.task_name,
+    #                      delta_type = delta_args.delta_type,
+    #                      model_name_or_path= model_args.model_name_or_path)
+    # results['repo_name'] = repo_name
+    # if delta_args.delta_type.lower() != "none":
+    #     if training_args.push_to_hub: # TODO add description here
+    #         delta_model.save_finetuned(push_to_hub=True, save_directory=repo_name, use_auth_token=True)
+    #         # trainer.push_to_hub(**kwargs)
+    #     else:
+    #         delta_model.save_finetuned(push_to_hub=False, save_directory=repo_name, use_auth_token=True)
+
+
+    with open(f"{training_args.output_dir}/results.json", 'w') as fout:
+        string = json.dumps(all_results, indent=4,sort_keys=True)
+        fout.write(string+"\n")
+
+    return all_results
+
+
+
+
+if __name__ == "__main__":
+    result = main()
+
--- a/examples/examples_prompt/trainers/init.py
+++ b/examples/examples_prompt/trainers/init.py
@ -1,2 +0,0 @@
-from .trainer import BaseTrainer
-from .seq2seq_trainer import Seq2SeqTrainer
--- a/examples/examples_prompt/trainers/model_args.py
+++ b/examples/examples_prompt/trainers/model_args.py
@ -1,36 +0,0 @@
-from dataclasses import dataclass, field
-from typing import Optional, List
-
-@dataclass
-class ModelArguments:
-    """
-    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
-    """
-    model_name_or_path: str = field(
-        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
-    )
-    config_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
-    )
-    tokenizer_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
-    )
-    cache_dir: Optional[str] = field(
-        default=None,
-        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
-    )
-    use_fast_tokenizer: bool = field(
-        default=True,
-        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
-    )
-    model_revision: str = field(
-        default="main",
-        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
-    )
-    use_auth_token: bool = field(
-        default=False,
-        metadata={
-            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
-            "with private models)."
-        },
-    )
--- a/examples/examples_prompt/trainers/seq2seq_trainer.py
+++ b/examples/examples_prompt/trainers/seq2seq_trainer.py
@ -1,108 +0,0 @@
-from packaging import version
-import torch
-from torch import nn
-from typing import Any, Dict, List, Optional, Tuple, Union
-
-from torch.utils.data.dataset import Dataset
-from transformers import Seq2SeqTrainer
-from .trainer import BaseTrainer 
-
-
-if version.parse(torch.__version__) >= version.parse("1.6"):
-    from torch.cuda.amp import autocast
-
-
-class Seq2SeqTrainer(Seq2SeqTrainer, BaseTrainer):
-    def __init__(self, train_dataset_sizes=None, delta_args=None, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.train_dataset_sizes = train_dataset_sizes
-        self.delta_args = delta_args
-
-    def evaluate(
-        self,
-        eval_dataset: Optional[Dict[str, Dataset]] = None,
-        ignore_keys: Optional[List[str]] = None,
-        metric_key_prefix: str = "eval",
-        max_length: Optional[int] = None,
-        num_beams: Optional[int] = None,
-    ) -> Dict[str, float]:
-        # TODO: this also needs to be set per dataset
-        self._max_length = max_length
-        self._num_beams = num_beams
-        return super().evaluate(eval_dataset, ignore_keys=ignore_keys, metric_key_prefix=metric_key_prefix)
-
-
-    def prediction_step(
-        self,
-        model: nn.Module,
-        inputs: Dict[str, Union[torch.Tensor, Any]],
-        prediction_loss_only: bool,
-        ignore_keys: Optional[List[str]] = None,
-    ) -> Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]:
-        """
-        Perform an evaluation step on :obj:`model` using obj:`inputs`.
-
-        Subclass and override to inject custom behavior.
-
-        Args:
-            model (:obj:`nn.Module`):
-                The model to evaluate.
-            inputs (:obj:`Dict[str, Union[torch.Tensor, Any]]`):
-                The inputs and targets of the model.
-
-                The dictionary will be unpacked before being fed to the model. Most models expect the targets under the
-                argument :obj:`labels`. Check your model's documentation for all accepted arguments.
-            prediction_loss_only (:obj:`bool`):
-                Whether or not to return the loss only.
-
-        Return:
-            Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]: A tuple with the loss, logits and
-            labels (each being optional).
-        """
-        if not self.args.predict_with_generate or prediction_loss_only:
-            return super().prediction_step(
-                model, inputs, prediction_loss_only=prediction_loss_only, ignore_keys=ignore_keys
-            )
-
-        has_labels = "labels" in inputs
-        inputs = self._prepare_inputs(inputs)
-        gen_kwargs = {
-            "max_length": self._max_length if self._max_length is not None else self.model.config.max_length,
-            "num_beams": self._num_beams if self._num_beams is not None else self.model.config.num_beams,
-        }
-        generated_tokens = self.model.generate(
-            inputs["input_ids"],
-            attention_mask=inputs["attention_mask"],
-            **gen_kwargs,
-        )
-        # in case the batch is shorter than max length, the output should be padded
-        if generated_tokens.shape[-1] < gen_kwargs["max_length"]:
-            generated_tokens = self._pad_tensors_to_max_len(generated_tokens, gen_kwargs["max_length"])
-
-        with torch.no_grad():
-            if self.use_amp:
-                with autocast():
-                    outputs = model(**inputs)
-            else:
-                outputs = model(**inputs)
-            if has_labels:
-                if self.label_smoother is not None:
-                    loss = self.label_smoother(outputs, inputs["labels"]).mean().detach()
-                else:
-                    loss = (outputs["loss"] if isinstance(outputs, dict) else outputs[0]).mean().detach()
-            else:
-                loss = None
-
-        if self.args.prediction_loss_only:
-            return (loss, None, None)
-
-        labels = inputs["labels"]
-        if labels.shape[-1] < gen_kwargs["max_length"]:
-            labels = self._pad_tensors_to_max_len(labels, gen_kwargs["max_length"])
-
-        return (loss, generated_tokens, labels)
-    
-    
-    
-    
-
--- a/examples/examples_prompt/trainers/trainer.py
+++ b/examples/examples_prompt/trainers/trainer.py
@ -1,274 +0,0 @@
-from typing import Dict, List, Optional
-import numpy as np 
-import time
-import torch
-import collections
-from packaging import version
-from torch.utils.data.dataset import Dataset
-
-from transformers import Trainer
-from transformers import logging
-from transformers.trainer_utils import (
-    speed_metrics,
-    EvalLoopOutput,
-    denumpify_detensorize
-)
-from transformers.file_utils import is_torch_tpu_available
-from transformers.trainer_pt_utils import (
-    find_batch_size,
-    nested_numpify,
-    nested_truncate,
-    nested_concat,
-    IterableDatasetShard
-)
-from .trainer_utils import EvalPrediction
-
-
-from torch.utils.data.dataloader import DataLoader
-from torch.utils.data.dataset import IterableDataset
-from transformers.deepspeed import deepspeed_init
-
-
-if version.parse(torch.__version__) >= version.parse("1.6"):
-    from torch.cuda.amp import autocast
-
-if is_torch_tpu_available():
-    import torch_xla.core.xla_model as xm
-    import torch_xla.debug.metrics as met
-    import torch_xla.distributed.parallel_loader as pl
-
-logger = logging.get_logger(__name__)
-
-class BaseTrainer(Trainer):
-    def __init__(self, evaluation_metrics=[], data_info=None, *args, **kwargs):
-        """When doing evaluation, it computes average of list of metrics 
-        given in evaluation_metrics and adds it to the dictionary of results.
-        Trainer class then use this average metric to save the best model."""
-        super().__init__(*args, **kwargs)
-        self.evaluation_metrics = evaluation_metrics 
-        self.data_info = data_info
-
-    def get_data_info(self, metric_key_prefix):
-        """Returns the data information required to make the predictions/labels
-        suitable for the evaluation."""
-        if self.data_info is not None:
-            return self.data_info[metric_key_prefix]
-        return None     
-
-    def evaluate(
-        self,
-        eval_dataset: Optional[Dataset] = None,
-        ignore_keys: Optional[List[str]] = None,
-        metric_key_prefix: str = "eval",
-    ) -> Dict[str, float]:
-        """
-        Run evaluation and returns metrics.
-        The calling script will be responsible for providing a method to compute metrics, as they are task-dependent
-        (pass it to the init :obj:`compute_metrics` argument).
-        You can also subclass and override this method to inject custom behavior.
-        Args:
-            eval_dataset (:obj:`Dataset`, `optional`):
-                Pass a dataset if you wish to override :obj:`self.eval_dataset`. If it is an :obj:`datasets.Dataset`,
-                columns not accepted by the ``model.forward()`` method are automatically removed. It must implement the
-                :obj:`__len__` method.
-            ignore_keys (:obj:`Lst[str]`, `optional`):
-                A list of keys in the output of your model (if it is a dictionary) that should be ignored when
-                gathering predictions.
-            metric_key_prefix (:obj:`str`, `optional`, defaults to :obj:`"eval"`):
-                An optional prefix to be used as the metrics key prefix. For example the metrics "bleu" will be named
-                "eval_bleu" if the prefix is "eval" (default)
-        Returns:
-            A dictionary containing the evaluation loss and the potential metrics computed from the predictions. The
-            dictionary also contains the epoch number which comes from the training state.
-        """
-        # memory metrics - must set up as early as possible
-        self._memory_tracker.start()
-        eval_dataloader = self.get_eval_dataloader(eval_dataset)
-        start_time = time.time()
-        eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
-        output = eval_loop(
-            eval_dataloader,
-            description="Evaluation",
-            # No point gathering the predictions if there are no metrics, otherwise we defer to
-            # self.args.prediction_loss_only
-            prediction_loss_only=True if self.compute_metrics is None else None,
-            ignore_keys=ignore_keys,
-            metric_key_prefix=metric_key_prefix,
-        )
-        output.metrics.update(speed_metrics(metric_key_prefix, start_time, output.num_samples))
-        if len(self.evaluation_metrics) != 0:
-           selected_metrics = [output.metrics[metric_key_prefix+"_"+k] for k in self.evaluation_metrics if metric_key_prefix+"_"+k in output.metrics]
-           assert len(selected_metrics) >= 1, "at least one metric should be selected to compute the average_metrics."
-           output.metrics.update({metric_key_prefix+'_average_metrics': np.mean(selected_metrics)})         
-    
-        self.log(output.metrics)
-
-        if self.args.tpu_metrics_debug or self.args.debug:
-            # tpu-comment: Logging debug metrics for PyTorch/XLA (compile, execute times, ops, etc.)
-            xm.master_print(met.metrics_report())
-
-        self.control = self.callback_handler.on_evaluate(self.args, self.state, self.control, output.metrics)
-        self._memory_tracker.stop_and_update_metrics(output.metrics)
-        return output.metrics
-    
-    def evaluation_loop(
-        self,
-        dataloader: DataLoader,
-        description: str,
-        prediction_loss_only: Optional[bool] = None,
-        ignore_keys: Optional[List[str]] = None,
-        metric_key_prefix: str = "eval",
-    ) -> EvalLoopOutput:
-        """
-        Prediction/evaluation loop, shared by :obj:`Trainer.evaluate()` and :obj:`Trainer.predict()`.
-
-        Works both with or without labels.
-        """
-        prediction_loss_only = (
-            prediction_loss_only if prediction_loss_only is not None else self.args.prediction_loss_only
-        )
-
-        # if eval is called w/o train init deepspeed here
-        if self.args.deepspeed and not self.deepspeed:
-
-            # XXX: eval doesn't have `resume_from_checkpoint` arg but we should be able to do eval
-            # from the checkpoint eventually
-            deepspeed_engine, _, _ = deepspeed_init(self, num_training_steps=0, resume_from_checkpoint=None)
-            self.model = deepspeed_engine.module
-            self.model_wrapped = deepspeed_engine
-            self.deepspeed = deepspeed_engine
-            # XXX: we don't need optim/sched for inference, but this needs to be sorted out, since
-            # for example the Z3-optimizer is a must for zero3 to work even for inference - what we
-            # don't need is the deepspeed basic optimizer which is self.optimizer.optimizer
-            deepspeed_engine.optimizer.optimizer = None
-            deepspeed_engine.lr_scheduler = None
-
-        model = self._wrap_model(self.model, training=False)
-  
-        # if full fp16 is wanted on eval and this ``evaluation`` or ``predict`` isn't called while
-        # ``train`` is running, halve it first and then put on device
-        if not self.is_in_train and self.args.fp16_full_eval:
-            model = model.half().to(self.args.device)
-
-        batch_size = dataloader.batch_size
-
-        logger.info(f"***** Running {description} *****")
-        if isinstance(dataloader.dataset, collections.abc.Sized):
-            logger.info(f"  Num examples = {self.num_examples(dataloader)}")
-        else:
-            logger.info("  Num examples: Unknown")
-        logger.info(f"  Batch size = {batch_size}")
-
-        model.eval()
-
-        self.callback_handler.eval_dataloader = dataloader
-        # Do this before wrapping.
-        eval_dataset = dataloader.dataset
-
-        if is_torch_tpu_available():
-            dataloader = pl.ParallelLoader(dataloader, [self.args.device]).per_device_loader(self.args.device)
-
-        if self.args.past_index >= 0:
-            self._past = None
-
-        # Initialize containers
-        # losses/preds/labels on GPU/TPU (accumulated for eval_accumulation_steps)
-        losses_host = None
-        preds_host = None
-        labels_host = None
-        # losses/preds/labels on CPU (final containers)
-        all_losses = None
-        all_preds = None
-        all_labels = None
-        # Will be useful when we have an iterable dataset so don't know its length.
-
-        observed_num_examples = 0
-        # Main evaluation loop
-        for step, inputs in enumerate(dataloader):
-            # Update the observed num examples
-            observed_batch_size = find_batch_size(inputs)
-            if observed_batch_size is not None:
-                observed_num_examples += observed_batch_size
-
-            # Prediction step
-            loss, logits, labels = self.prediction_step(model, inputs, prediction_loss_only, ignore_keys=ignore_keys)
-            # Update containers on host
-            if loss is not None:
-                losses = self._nested_gather(loss.repeat(batch_size))
-                losses_host = losses if losses_host is None else torch.cat((losses_host, losses), dim=0)
-            if logits is not None:
-                logits = self._pad_across_processes(logits)
-                logits = self._nested_gather(logits)
-                preds_host = logits if preds_host is None else nested_concat(preds_host, logits, padding_index=-100)
-            if labels is not None:
-                labels = self._pad_across_processes(labels)
-                labels = self._nested_gather(labels)
-                labels_host = labels if labels_host is None else nested_concat(labels_host, labels, padding_index=-100)
-            self.control = self.callback_handler.on_prediction_step(self.args, self.state, self.control)
-
-            # Gather all tensors and put them back on the CPU if we have done enough accumulation steps.
-            if self.args.eval_accumulation_steps is not None and (step + 1) % self.args.eval_accumulation_steps == 0:
-                if losses_host is not None:
-                    losses = nested_numpify(losses_host)
-                    all_losses = losses if all_losses is None else np.concatenate((all_losses, losses), axis=0)
-                if preds_host is not None:
-                    logits = nested_numpify(preds_host)
-                    all_preds = logits if all_preds is None else nested_concat(all_preds, logits, padding_index=-100)
-                if labels_host is not None:
-                    labels = nested_numpify(labels_host)
-                    all_labels = (
-                        labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100)
-                    )
-
-                # Set back to None to begin a new accumulation
-                losses_host, preds_host, labels_host = None, None, None
-
-        if self.args.past_index and hasattr(self, "_past"):
-            # Clean the state at the end of the evaluation loop
-            delattr(self, "_past")
-
-        # Gather all remaining tensors and put them back on the CPU
-        if losses_host is not None:
-            losses = nested_numpify(losses_host)
-            all_losses = losses if all_losses is None else np.concatenate((all_losses, losses), axis=0)
-        if preds_host is not None:
-            logits = nested_numpify(preds_host)
-            all_preds = logits if all_preds is None else nested_concat(all_preds, logits, padding_index=-100)
-        if labels_host is not None:
-            labels = nested_numpify(labels_host)
-            all_labels = labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100)
-
-        # Number of samples
-        if not isinstance(eval_dataset, IterableDataset):
-            num_samples = len(eval_dataset)
-        elif isinstance(eval_dataset, IterableDatasetShard):
-            num_samples = eval_dataset.num_examples
-        else:
-            num_samples = observed_num_examples
-
-        # Number of losses has been rounded to a multiple of batch_size and in a distributed training, the number of
-        # samplers has been rounded to a multiple of batch_size, so we truncate.
-        if all_losses is not None:
-            all_losses = all_losses[:num_samples]
-        if all_preds is not None:
-            all_preds = nested_truncate(all_preds, num_samples)
-        if all_labels is not None:
-            all_labels = nested_truncate(all_labels, num_samples)
-        # Metrics!
-        if self.compute_metrics is not None and all_preds is not None and all_labels is not None:
-            metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels, 
-            data_info=self.get_data_info(metric_key_prefix)))
-        else:
-            metrics = {}
-
-        # To be JSON-serializable, we need to remove numpy types or zero-d tensors
-        metrics = denumpify_detensorize(metrics)
-
-        if all_losses is not None:
-            metrics[f"{metric_key_prefix}_loss"] = all_losses.mean().item()
-
-        # Prefix all keys with metric_key_prefix + '_'
-        for key in list(metrics.keys()):
-            if not key.startswith(f"{metric_key_prefix}_"):
-                metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key)
-        return EvalLoopOutput(predictions=all_preds, label_ids=all_labels, metrics=metrics, num_samples=num_samples)
--- a/examples/examples_prompt/trainers/trainer_utils.py
+++ b/examples/examples_prompt/trainers/trainer_utils.py
@ -1,75 +0,0 @@
-import numpy as np 
-from typing import Union, NamedTuple, Tuple, Dict, Any   
-import os 
-import regex as re
-import logging
-from dataclasses import fields
-import torch.nn as nn
-import json
-
-
-
-
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
-
-class EvalPrediction(NamedTuple):
-    """
-    Evaluation output (always contains labels), to be used to compute metrics.
-    Parameters:
-        predictions (:obj:`np.ndarray`): Predictions of the model.
-        label_ids (:obj:`np.ndarray`): Targets to be matched.
-        data_info: (:obj:`Dict[str, Any]`): Extra dataset information, one requires
-        to performs the evaluation. The data_info is a dictionary with keys from
-        train, eval, test to specify the data_info for each split of the dataset.
-    """
-    predictions: Union[np.ndarray, Tuple[np.ndarray]]
-    label_ids: np.ndarray
-    data_info: Dict[str, Any]
-
-
-
-
-
-def create_dir(output_dir):
-    """
-    Checks whether to the output_dir already exists and creates it if not.
-    Args:
-      output_dir: path to the output_dir
-    """
-    if not os.path.exists(output_dir):
-        os.makedirs(output_dir)
-
-
-def get_last_checkpoint(output_dir):
-    if os.path.exists(os.path.join(output_dir, 'pytorch_model.bin')):
-        return output_dir
-    return None
-
-
-def pad_punctuation(text):
-   """Re-implementation of _pad_punctuation in t5. This function adds spaces
-   around punctuation. While this pads punctuation as expected, it has the 
-   unexpected effected of padding certain unicode characters with accents, with
-   spaces as well. For instance: "François" becomes "Fran ç ois"""
-   # Pad everything except for: underscores (_), whitespace (\s),
-   # numbers (\p{N}), letters (\p{L}) and accent characters (\p{M}).
-   text = re.sub(r'([^_\s\p{N}\p{L}\p{M}])', r' \1 ', text)
-   # Collapse consecutive whitespace into one space.
-   text = re.sub(r'\s+', ' ', text)
-   return text
-
-def save_json(filepath, dictionary):
-   with open(filepath, "w") as outfile:
-      json.dump(dictionary, outfile)
-
-
-def read_json(filepath):
-   f = open(filepath,)
-   return json.load(f)
-
-
-def save_training_config(config_file, output_dir):
-   json_data = read_json(config_file)
-   save_json(os.path.join(output_dir, "training_config.json"), json_data)
-
--- a/examples/examples_prompt/utils/init.py
+++ b/examples/examples_prompt/utils/init.py
@ -0,0 +1 @@
+from .utils import *
--- a/examples/examples_prompt/trainers/trainer_args.py
+++ b/examples/examples_prompt/trainers/trainer_args.py
@ -1,10 +1,51 @@
 from dataclasses import dataclass, field
 from typing import Optional, List
-from transformers import Seq2SeqTrainingArguments
+from transformers import HfArgumentParser
+
+@dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    num_classes:Optional[int]=field(
+        default=None, metadata={"help": "The number of classes, used to initialize classification models"}
+    )
+
+
+
+from transformers import TrainingArguments as HfTrainingArguments
 # run_seq2seq parameters.

@dataclass
-class TrainingArguments(Seq2SeqTrainingArguments):
+class TrainingArguments(HfTrainingArguments):
    print_num_parameters: Optional[bool] = field(default=False, metadata={"help": "If set, print the parameters of "
                                                                                 "the model."})
    do_test: Optional[bool] = field(default=False, metadata={"help": "If set, evaluates the test performance."})
@ -16,9 +57,31 @@ class TrainingArguments(Seq2SeqTrainingArguments):
                                                                    "than 10K samples datasets), or by using 1K examples"
                                                                    "from training set as validation set (for larger"
                                                                    " datasets)."})
-    compute_time: Optional[bool] = field(default=False, metadata={"help": "If set measures the time."})
-    compute_memory: Optional[bool] = field(default=False, metadata={"help": "if set, measures the memory"})
+    compute_time: Optional[bool] = field(default=True, metadata={"help": "If set measures the time."})
+    compute_memory: Optional[bool] = field(default=True, metadata={"help": "if set, measures the memory"})
    is_seq2seq: Optional[bool] = field(default=True, metadata={"help": "whether the pipeline is a seq2seq one"})
+    sortish_sampler: bool = field(default=False, metadata={"help": "Whether to use SortishSampler or not."})
+    predict_with_generate: bool = field(
+        default=False, metadata={"help": "Whether to use generate to calculate generative metrics (ROUGE, BLEU)."}
+    )
+    generation_max_length: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "The `max_length` to use on each evaluation loop when `predict_with_generate=True`. Will default "
+            "to the `max_length` value of the model configuration."
+        },
+    )
+    generation_num_beams: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "The `num_beams` to use on each evaluation loop when `predict_with_generate=True`. Will default "
+            "to the `num_beams` value of the model configuration."
+        },
+    )
+    remove_unused_columns: Optional[bool] = field(
+        default=False, metadata={"help": "Remove columns not required by the model when using an nlp.Dataset."}
+    )
+



@ -48,7 +111,7 @@ class DataTrainingArguments:
        default=None, metadata={"help": "The configuration name of the test dataset to use (via the datasets library)."}
    )
    overwrite_cache: bool = field(
-        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+        default=True, metadata={"help": "Overwrite the cached training and evaluation sets"}
    )
    preprocessing_num_workers: Optional[int] = field(
        default=None,
@ -134,7 +197,6 @@ class DataTrainingArguments:
    datasets_saved_path: Optional[str] = field(
        default=None, metadata={"help": "the path of the saved datasets"}
    )
-
    data_seed: Optional[int] = field(default=42, metadata={"help": "seed used to shuffle the data."})


@ -147,3 +209,30 @@ class DataTrainingArguments:
            self.val_max_target_length = self.max_target_length
        if self.test_max_target_length is None:
            self.test_max_target_length = self.max_target_length
+
+
+class RemainArgHfArgumentParser(HfArgumentParser):
+    def parse_json_file(self, json_file: str, return_remaining_args=True ):
+        """
+        Alternative helper method that does not use `argparse` at all, instead loading a json file and populating the
+        dataclass types.
+        """
+        import argparse
+        import json
+        from pathlib import Path
+        import dataclasses
+
+        data = json.loads(Path(json_file).read_text())
+        outputs = []
+        for dtype in self.dataclass_types:
+            keys = {f.name for f in dataclasses.fields(dtype) if f.init}
+            inputs = {k: data.pop(k) for k in list(data.keys()) if k in keys}
+            obj = dtype(**inputs)
+            outputs.append(obj)
+
+        remain_args = argparse.ArgumentParser()
+        remain_args.__dict__.update(data)
+        if return_remaining_args:
+            return (*outputs, remain_args)
+        else:
+            return (*outputs,)
--- a/examples/examples_prompt/utils/utils.py
+++ b/examples/examples_prompt/utils/utils.py
@ -1,15 +1,48 @@
-import os 
-import regex as re
-import logging
-from dataclasses import fields
-import torch.nn as nn
+
 import json
+import os
+import re

-logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
+
+
+# class EvalPrediction(NamedTuple):
+#     """
+#     Evaluation output (always contains labels), to be used to compute metrics.
+#     Parameters:
+#         predictions (:obj:`np.ndarray`): Predictions of the model.
+#         label_ids (:obj:`np.ndarray`): Targets to be matched.
+#         data_info: (:obj:`Dict[str, Any]`): Extra dataset information, one requires
+#         to performs the evaluation. The data_info is a dictionary with keys from
+#         train, eval, test to specify the data_info for each split of the dataset.
+#     """
+#     predictions: Union[np.ndarray, Tuple[np.ndarray]]
+#     label_ids: np.ndarray
+#     data_info: Dict[str, Any]
+
+def create_dir(output_dir):
+    """
+    Checks whether to the output_dir already exists and creates it if not.
+    Args:
+      output_dir: path to the output_dir
+    """
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+
+
+def get_last_checkpoint(output_dir):
+    if os.path.exists(os.path.join(output_dir, 'pytorch_model.bin')):
+        return output_dir
+    return None





+def save_json(filepath, dictionary):
+   with open(filepath, "w") as outfile:
+      json.dump(dictionary, outfile)

+
+def read_json(filepath):
+   f = open(filepath,)
+   return json.load(f)
--- a/opendelta/delta_models/soft_prompt.py
+++ b/opendelta/delta_models/soft_prompt.py
@ -66,7 +66,7 @@ class SoftPromptLayer(nn.Module):
        assert self.num_tokens>0
        self.instantiate(raw_embedding(torch.tensor([0])).shape[-1])

-        self.all_pseudo_tokens = {}
+        # self.all_pseudo_tokens = {}

    def pre_forward(self, *args, **kwargs):
        # if attention_mask is passed as PLM's input, modify it here
@ -108,15 +108,15 @@ class SoftPromptLayer(nn.Module):
        for expand_key in self.other_expand_ids:
            if expand_key in kwargs:
                real_tokens = kwargs[expand_key]
-                if expand_key in self.all_pseudo_tokens:
-                    pseudo_tokens = self.all_pseudo_tokens[expand_key].to(real_tokens.device)
-                else:
-                    pseudo_tokens_value = self.other_expand_ids[expand_key]
-                    pseudo_tokens = torch.ones(
-                        (*real_tokens.shape[:-1], inputs_embeds.shape[-2]-real_tokens.shape[-1]),
-                        dtype = real_tokens.dtype,
-                        device=real_tokens.device) * pseudo_tokens_value
-                    self.all_pseudo_tokens[expand_key] = pseudo_tokens
+                # if expand_key in self.all_pseudo_tokens:
+                #     pseudo_tokens = self.all_pseudo_tokens[expand_key].to(real_tokens.device)
+                # else:
+                pseudo_tokens_value = self.other_expand_ids[expand_key]
+                pseudo_tokens = torch.ones(
+                    (*real_tokens.shape[:-1], inputs_embeds.shape[-2]-real_tokens.shape[-1]),
+                    dtype = real_tokens.dtype,
+                    device=real_tokens.device) * pseudo_tokens_value
+                    # self.all_pseudo_tokens[expand_key] = pseudo_tokens
                real_tokens.data = torch.cat([pseudo_tokens, real_tokens], dim=-1)

        return args, kwargs
				`@ -1 +0,0 @@`
				`Subproject commit 058e5f25c898a1f956e3f17a0db6d62f08173e7f`
				`@ -1 +0,0 @@`
				`Subproject commit 3a5083d61e73bae607574a3047deafaa76b97646`