Merge branch 'main' into main
This commit is contained in:
commit
a14c19e8dc
|
@ -19,7 +19,20 @@ log.txt
|
|||
**/DeltaHub/
|
||||
*beans/
|
||||
**/examples/*/configs/
|
||||
|
||||
!examples/*/configs/config_gen.py
|
||||
**/jupyter_notebook_examples/
|
||||
!examples/jupyter_notebook_examples/*.py
|
||||
|
||||
|
||||
!**/examples/*/configs/config_gen.py
|
||||
**/outputs_search/**/*.bin
|
||||
**/outputs_search/**/*.pt
|
||||
*.db
|
||||
**/nohup.out
|
||||
**/examples/examples_bmtrain/BigModels/down_data
|
||||
**/examples/examples_bmtrain/BMTrain_stable
|
||||
**/examples/examples_bmtrain/BMPretrain
|
||||
**/examples/examples_bmtrain/BigModels/BigModels/results
|
||||
**/Delta_Memory/
|
||||
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Subproject commit 058e5f25c898a1f956e3f17a0db6d62f08173e7f
|
|
@ -0,0 +1 @@
|
|||
Subproject commit 3a5083d61e73bae607574a3047deafaa76b97646
|
|
@ -0,0 +1,64 @@
|
|||
# Appling OpenDelta to GLUE/SuperGLUE tasks using Seq2Seq Paradigm
|
||||
|
||||
|
||||
## install the repo
|
||||
```bash
|
||||
cd ../
|
||||
python setup_seq2seq.py develop
|
||||
```
|
||||
This will add `examples_seq2seq` to the environment path of the python lib.
|
||||
|
||||
## Generating the json configuration file
|
||||
|
||||
```
|
||||
python config_gen.py --job $job_name
|
||||
|
||||
```
|
||||
The available job configuration (e.g., `--job lora_t5-base`) can be seen from `config_gen.py`. You can also
|
||||
create your only configuration.
|
||||
|
||||
|
||||
## Run the code
|
||||
|
||||
```
|
||||
python run_seq2seq.py configs/$job_name/$dataset.json
|
||||
```
|
||||
|
||||
## Possible Errors
|
||||
|
||||
1.
|
||||
```
|
||||
ValueError: You must login to the Hugging Face hub on this computer by typing `transformers-cli login` and entering your credentials to use `use_auth_token=Tr
|
||||
ue`. Alternatively, you can pass your own token as the `use_auth_token` argument.
|
||||
```
|
||||
- Solution 1: Please register an account on [HuggingFace](https://huggingface.co/)
|
||||
Then run transformers-cli login on your command line to enter the username and password.
|
||||
|
||||
- Solution 2: Disable push_to_hub by modifying in the config.json : "push_to_hub": False
|
||||
|
||||
2.
|
||||
```
|
||||
OSError: Looks like you do not have git-lfs installed, please install. You can install from https://git-lfs.github.com/. Then run `git lfs install` (you only have to do this once).
|
||||
```
|
||||
|
||||
- Solution 1:
|
||||
```
|
||||
wget -P ~ https://github.com/git-lfs/git-lfs/releases/download/v3.0.2/git-lfs-linux-amd64-v3.0.2.tar.gz
|
||||
cd ~
|
||||
tar -xvzf git-lfs-linux-amd64-v3.0.2.tar.gz
|
||||
export PATH=~:$PATH
|
||||
git-lfs install
|
||||
```
|
||||
|
||||
- Solution 2: Disable push_to_hub by modifying in the config.json : "push_to_hub": False
|
||||
|
||||
|
||||
3. dataset connection error
|
||||
|
||||
Solution 1: open a python console, running the error command again, may not be useful
|
||||
|
||||
Solution 2: download the dataset by yourself on a internect connected machine, saved to disk and transfer to your server, at last load_from_disk.
|
||||
|
||||
|
||||
## Link to the original training scripts
|
||||
This example repo is based on the [compacter training scripts](https://github.com/rabeehk/compacter), with compacter-related lines removed. Thanks to the authors of the original repo. In addition, in private correspondence with the authors, they shared the codes to create the json configs. Thanks again for their efforts.
|
|
@ -0,0 +1,59 @@
|
|||
# the final results will be populated here.{
|
||||
"evaluate": {
|
||||
"epoch": 20.0,
|
||||
"eval_accuracy": 89.2156862745098,
|
||||
"eval_average_metrics": 90.76168929110105,
|
||||
"eval_f1": 92.3076923076923,
|
||||
"eval_loss": 0.16493959724903107,
|
||||
"eval_runtime": 1.6391,
|
||||
"eval_samples_per_second": 124.455
|
||||
},
|
||||
"repo_name": "DeltaHub/bitfit_t5-base_mrpc",
|
||||
"test": {
|
||||
"epoch": 20.0,
|
||||
"test_accuracy": 88.23529411764706,
|
||||
"test_average_metrics": 89.97971602434077,
|
||||
"test_f1": 91.72413793103448,
|
||||
"test_loss": 0.14968213438987732,
|
||||
"test_runtime": 1.6344,
|
||||
"test_samples_per_second": 124.82
|
||||
}
|
||||
}
|
||||
{
|
||||
"evaluate": {
|
||||
"epoch": 20.0,
|
||||
"eval_average_metrics": 52.10265668831534,
|
||||
"eval_loss": 0.3603779077529907,
|
||||
"eval_matthews_correlation": 52.10265668831534,
|
||||
"eval_runtime": 1.0808,
|
||||
"eval_samples_per_second": 482.046
|
||||
},
|
||||
"repo_name": "DeltaHub/bitfit_t5-base_cola",
|
||||
"test": {
|
||||
"epoch": 20.0,
|
||||
"test_average_metrics": 54.209563471221934,
|
||||
"test_loss": 0.2853100299835205,
|
||||
"test_matthews_correlation": 54.209563471221934,
|
||||
"test_runtime": 1.056,
|
||||
"test_samples_per_second": 494.304
|
||||
}
|
||||
}
|
||||
{
|
||||
"evaluate": {
|
||||
"epoch": 20.0,
|
||||
"eval_average_metrics": 53.80613287067274,
|
||||
"eval_loss": 0.25723716616630554,
|
||||
"eval_matthews_correlation": 53.80613287067274,
|
||||
"eval_runtime": 1.0583,
|
||||
"eval_samples_per_second": 492.299
|
||||
},
|
||||
"repo_name": "DeltaHub/bitfit_t5-base_cola",
|
||||
"test": {
|
||||
"epoch": 20.0,
|
||||
"test_average_metrics": 54.32497579543861,
|
||||
"test_loss": 0.22327613830566406,
|
||||
"test_matthews_correlation": 54.32497579543861,
|
||||
"test_runtime": 1.0556,
|
||||
"test_samples_per_second": 494.507
|
||||
}
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
from .tasks import TASK_MAPPING, AutoTask
|
||||
from .data_collator import TaskDataCollatorForSeq2Seq
|
||||
from .postprocessors import AutoPostProcessor
|
|
@ -0,0 +1,16 @@
|
|||
import numpy as np
|
||||
from dataclasses import dataclass
|
||||
from transformers import DataCollatorForSeq2Seq
|
||||
|
||||
|
||||
@dataclass
|
||||
class TaskDataCollatorForSeq2Seq(DataCollatorForSeq2Seq):
|
||||
def check_uniqueness(self, samples):
|
||||
assert len(np.unique(samples)) == 1
|
||||
|
||||
def __call__(self, features):
|
||||
# tasks = [d.pop('task') for d in features]
|
||||
# self.check_uniqueness(tasks)
|
||||
output = super().__call__(features)
|
||||
# output["task"] = tasks[0]
|
||||
return output
|
|
@ -0,0 +1,67 @@
|
|||
import abc
|
||||
from collections import OrderedDict
|
||||
import numpy as np
|
||||
|
||||
"""Defines functions to process the outputs to make them ready for the evaluation."""
|
||||
|
||||
def string_to_float(string, default=-1., **unused_kwargs):
|
||||
"""Converts string to float, using default when conversion not possible."""
|
||||
try:
|
||||
return float(string)
|
||||
except ValueError:
|
||||
return default
|
||||
|
||||
|
||||
class PostProcessor(abc.ABC):
|
||||
"""Postprocess the predictions and labels to make them suitable for
|
||||
evaluation."""
|
||||
def __init__(self, tokenizer, ignore_pad_token_for_loss):
|
||||
self.tokenizer = tokenizer
|
||||
self.ignore_pad_token_for_loss = ignore_pad_token_for_loss
|
||||
|
||||
|
||||
def process(self, preds, labels, data_info=None):
|
||||
if isinstance(preds, tuple):
|
||||
preds = preds[0]
|
||||
decoded_preds = self.tokenizer.batch_decode(preds, skip_special_tokens=True)
|
||||
if self.ignore_pad_token_for_loss:
|
||||
# Replace -100 in the labels as we can't decode them.
|
||||
labels = np.where(labels != -100, labels, self.tokenizer.pad_token_id)
|
||||
decoded_labels = self.tokenizer.batch_decode(labels, skip_special_tokens=True)
|
||||
# Some simple post-processing
|
||||
decoded_preds = [pred.strip() for pred in decoded_preds]
|
||||
decoded_labels = [label.strip() for label in decoded_labels]
|
||||
return decoded_preds, decoded_labels
|
||||
|
||||
|
||||
class MultiRC(PostProcessor):
|
||||
def process(self, preds, labels, data_info):
|
||||
preds, labels = super().process(preds, labels, data_info)
|
||||
preds = [{"group": info["group"], "value":pred} \
|
||||
for info, pred in zip(data_info, preds)]
|
||||
labels = [{"group": info["group"], "value": label}\
|
||||
for info, label in zip(data_info, labels)]
|
||||
return preds, labels
|
||||
|
||||
class Record(PostProcessor):
|
||||
def process(self, preds, labels, data_info):
|
||||
preds, labels = super().process(preds, labels, data_info)
|
||||
labels = [info["answers"] for info in data_info]
|
||||
return preds, labels
|
||||
|
||||
|
||||
POSTPROCESSOR_MAPPING = OrderedDict(
|
||||
[
|
||||
('superglue-record', Record),
|
||||
('superglue-multirc', MultiRC)
|
||||
]
|
||||
)
|
||||
|
||||
class AutoPostProcessor:
|
||||
@classmethod
|
||||
def get(self, task, tokenizer, ignore_pad_token_for_loss):
|
||||
if task in POSTPROCESSOR_MAPPING:
|
||||
return POSTPROCESSOR_MAPPING[task](tokenizer, ignore_pad_token_for_loss)
|
||||
return PostProcessor(tokenizer, ignore_pad_token_for_loss)
|
||||
|
||||
|
|
@ -0,0 +1,152 @@
|
|||
|
||||
|
||||
class BasePrompt(object):
|
||||
def __init__(self, template_id=0, verbalizer_id=0, generation=True):
|
||||
self.template = self.textual_templates[template_id]
|
||||
if generation:
|
||||
self.verbalizer = self.generation_verbalizers[verbalizer_id]
|
||||
else:
|
||||
self.verbalizer = self.mlmhead_verbalizers[verbalizer_id]
|
||||
|
||||
|
||||
def __call__(self, example):
|
||||
def eval_syntax(syntaxlist, example):
|
||||
composed = []
|
||||
for x in syntaxlist:
|
||||
if x.startswith("[_eval_]"):
|
||||
t = eval(x[len("[_eval_]"):])
|
||||
else:
|
||||
t = x
|
||||
composed.append(t)
|
||||
return composed
|
||||
src_texts = eval_syntax(self.template,example)
|
||||
|
||||
tgt_texts = self.verbalizer[str(example['label'])]
|
||||
if isinstance(tgt_texts, list):
|
||||
tgt_texts = eval_syntax(tgt_texts, example)
|
||||
else:
|
||||
tgt_texts = [tgt_texts]
|
||||
return src_texts, tgt_texts
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class MRPCPrompt(BasePrompt):
|
||||
generation_verbalizers = [
|
||||
{
|
||||
"0": "different",
|
||||
"1": "same"
|
||||
},
|
||||
{
|
||||
"0": "not_equivalent",
|
||||
"1": "equivalent"
|
||||
}
|
||||
]
|
||||
mlmhead_verbalizers = {
|
||||
"0": "different",
|
||||
"1": "same"
|
||||
}
|
||||
textual_templates = [
|
||||
["sentence1:", """[_eval_]example['sentence1']""",
|
||||
"sentence2:", """[_eval_]example["sentence2"]""", "Meanings different of same? Answer: " ]
|
||||
]
|
||||
|
||||
class BoolQPrompt(BasePrompt):
|
||||
generation_verbalizers = [
|
||||
{
|
||||
"0": "different",
|
||||
"1": "same"
|
||||
},
|
||||
{
|
||||
"0": "not_equivalent",
|
||||
"1": "equivalent"
|
||||
}
|
||||
]
|
||||
mlmhead_verbalizers = {
|
||||
"0": "different",
|
||||
"1": "same"
|
||||
}
|
||||
textual_templates = [
|
||||
["sentence1:", """[_eval_]example['sentence1']""",
|
||||
"sentence2:", """[_eval_]example["sentence2"]""", "Meanings different of same? Answer: " ]
|
||||
]
|
||||
|
||||
class BoolQPrompt(BasePrompt):
|
||||
generation_verbalizers = [
|
||||
{
|
||||
"0": "no",
|
||||
"1": "yes"
|
||||
},
|
||||
]
|
||||
mlmhead_verbalizers = {
|
||||
"0": "no",
|
||||
"1": "yes"
|
||||
}
|
||||
textual_templates = [
|
||||
["hypothesis:", """[_eval_]example['hypothesis']""",
|
||||
"premise:", """[_eval_]example["premise"]""", "The answer was " ]
|
||||
]
|
||||
|
||||
class COLAPrompt(BasePrompt):
|
||||
generation_verbalizers = [
|
||||
{
|
||||
"0": "No",
|
||||
"1": "Yes"
|
||||
},
|
||||
]
|
||||
mlmhead_verbalizers = {
|
||||
"0": "No",
|
||||
"1": "Yes"
|
||||
}
|
||||
textual_templates = [
|
||||
["sentence:", """[_eval_]example['sentence']""",
|
||||
"grammar correct? " ]
|
||||
]
|
||||
|
||||
|
||||
class RTEPrompt(BasePrompt):
|
||||
generation_verbalizers = [
|
||||
{
|
||||
"0": "yes",
|
||||
"1": "no"
|
||||
},
|
||||
]
|
||||
mlmhead_verbalizers = {
|
||||
"0": "yes",
|
||||
"1": "no"
|
||||
}
|
||||
textual_templates = [
|
||||
["sentence1:", """[_eval_]example['premise']""", "sentence2:",
|
||||
"""[_eval_]example['hypothesis']""",
|
||||
"The answer was " ]
|
||||
]
|
||||
|
||||
class CBPrompt(BasePrompt):
|
||||
generation_verbalizers = [{
|
||||
"0": "yes",
|
||||
"1": "no",
|
||||
"2": "maybe"
|
||||
},
|
||||
]
|
||||
mlmhead_verbalizers = [{
|
||||
"0": "yes",
|
||||
"1": "no",
|
||||
"2": "maybe"
|
||||
}]
|
||||
textual_templates = [
|
||||
["hypothesis:", """[_eval_]example['hypothesis']""", "premise:",
|
||||
"""[_eval_]example['premise']""",
|
||||
"The answer was " ]
|
||||
]
|
||||
|
||||
PromptCollections = {
|
||||
"mrpc": MRPCPrompt,
|
||||
"cola": COLAPrompt,
|
||||
"rte": RTEPrompt,
|
||||
"superglue-boolq": BoolQPrompt,
|
||||
"cb": CBPrompt,
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,593 @@
|
|||
from collections import OrderedDict
|
||||
import collections
|
||||
import abc
|
||||
import functools
|
||||
from selectors import EpollSelector
|
||||
from typing import Callable, List, Mapping
|
||||
from examples_prompt.trainers.trainer_utils import pad_punctuation
|
||||
from examples_prompt.metrics import metrics
|
||||
from .utils import round_stsb_target
|
||||
import datasets
|
||||
import logging
|
||||
import numpy as np
|
||||
import torch
|
||||
import re
|
||||
from examples_prompt.data_processors.prompt import PromptCollections
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class AbstractTask(abc.ABC):
|
||||
name = NotImplemented
|
||||
config = NotImplemented
|
||||
prefix = NotImplemented
|
||||
metric = NotImplemented
|
||||
metric_names = NotImplemented
|
||||
split_map = None
|
||||
labels_list = None
|
||||
split_to_data_split: Mapping[str, str] = \
|
||||
{"train": "train", "validation": "validation", "test": "test"}
|
||||
small_datasets_without_all_splits = ["cola", "wnli", "rte", "superglue-cb", "superglue-copa", "superglue-multirc",
|
||||
"superglue-wic", "superglue-wsc.fixed", "superglue-rte", "mrpc", "stsb",
|
||||
"superglue-boolq"]
|
||||
large_data_without_all_splits = ["qqp", "qnli", "superglue-record", "sst2"]
|
||||
|
||||
def __init__(self, config, seed=42):
|
||||
self.config = config
|
||||
self.seed = seed
|
||||
|
||||
tid = getattr(config, "template_id", 0)
|
||||
vid = getattr(config, "verbalizer_id", 0)
|
||||
generation_paradigm = getattr(config, "generation_paradigm", True)
|
||||
self.prompt = PromptCollections[self.name](tid, vid, generation_paradigm)
|
||||
|
||||
def get_max_target_length(self, tokenizer, default_max_length):
|
||||
if self.prompt.verbalizer is not None:
|
||||
return max([len(tokenizer.encode(label)) for key, label in self.prompt.verbalizer.items()])
|
||||
return default_max_length
|
||||
|
||||
def seq2seq_format(self, source, target, extra_fields={}
|
||||
):
|
||||
|
||||
return {'source': ' '.join(source),
|
||||
'target': ' '.join(target),
|
||||
'task': self.name,
|
||||
'extra_fields': extra_fields
|
||||
}
|
||||
|
||||
def check_n_obs(self, n_obs, total_size):
|
||||
if n_obs is not None and n_obs > total_size:
|
||||
n_obs = total_size
|
||||
logger.warning("n_obs is set to %s", n_obs)
|
||||
return n_obs
|
||||
|
||||
def shuffled_indices(self, dataset):
|
||||
num_samples = len(dataset)
|
||||
generator = torch.Generator()
|
||||
generator.manual_seed(self.seed)
|
||||
return torch.randperm(num_samples, generator=generator).tolist()
|
||||
|
||||
def subsample(self, dataset, n_obs=None, indices=None):
|
||||
"""
|
||||
Given a dataset returns the subsampled dataset.
|
||||
:param n_obs: the number of samples of the subsampled dataset.
|
||||
:param indices: indices to select the samples from, if not given, indices are computed
|
||||
from by shuffling the given dataset.
|
||||
:return: subsampled dataset.
|
||||
"""
|
||||
num_samples = len(dataset)
|
||||
n_obs = self.check_n_obs(n_obs, num_samples)
|
||||
if indices is None:
|
||||
indices = self.shuffled_indices(dataset)
|
||||
indices = indices[:n_obs]
|
||||
return dataset.select(indices)
|
||||
|
||||
def load_dataset(self, split: int):
|
||||
return datasets.load_dataset(self.name, self.config, split=split, script_version="master")
|
||||
|
||||
def get_split_indices(self, split, dataset, validation_size):
|
||||
indices = self.shuffled_indices(dataset)
|
||||
if split == "validation":
|
||||
return indices[:validation_size]
|
||||
else:
|
||||
return indices[validation_size:]
|
||||
|
||||
|
||||
def map_dataset(self, dataset, add_prefix):
|
||||
# from IPython import embed; embed(header="in get target length")
|
||||
return dataset.map(self.preprocessor)
|
||||
|
||||
|
||||
def preprocessor(self, example):
|
||||
source, target = self.prompt(example)
|
||||
return self.seq2seq_format(source, target, extra_fields={})
|
||||
|
||||
def get(self, split, add_prefix=True, n_obs=None, split_validation_test=False):
|
||||
# For small datasets (n_samples < 10K) without test set, we divide validation set to
|
||||
# half, use one half as test set and one half as validation set.
|
||||
if split_validation_test and self.name in self.small_datasets_without_all_splits \
|
||||
and split != "train":
|
||||
mapped_split = self.split_to_data_split["validation"]
|
||||
dataset = self.load_dataset(split=mapped_split)
|
||||
indices = self.get_split_indices(split, dataset, validation_size=len(dataset)//2)
|
||||
dataset = self.subsample(dataset, n_obs, indices)
|
||||
# For larger datasets (n_samples > 10K), we divide training set into 1K as
|
||||
# validation and the rest as training set, keeping the original validation
|
||||
# set as the test set.
|
||||
elif split_validation_test and self.name in self.large_data_without_all_splits \
|
||||
and split != "test":
|
||||
dataset = self.load_dataset(split="train")
|
||||
indices = self.get_split_indices(split, dataset, validation_size=1000)
|
||||
dataset = self.subsample(dataset, n_obs, indices)
|
||||
else:
|
||||
mapped_split = self.split_to_data_split[split]
|
||||
dataset = self.load_dataset(split=mapped_split)
|
||||
# shuffles the data and samples it.
|
||||
if n_obs is not None:
|
||||
dataset = self.subsample(dataset, n_obs)
|
||||
return self.map_dataset(dataset, add_prefix)
|
||||
|
||||
class Squad(AbstractTask):
|
||||
name = "squad"
|
||||
metric = [metrics.squad]
|
||||
|
||||
def load_dataset(self, split):
|
||||
return datasets.load_dataset(self.name, split=split, script_version="master")
|
||||
|
||||
def preprocessor(self, example, add_prefix):
|
||||
answer = pad_punctuation(example['answers']['text'][0])
|
||||
question = pad_punctuation(example['question'])
|
||||
context = pad_punctuation(example['context'])
|
||||
source = ["question:", question,
|
||||
"context:", context]
|
||||
target = [answer]
|
||||
return self.seq2seq_format(source, target, add_prefix)
|
||||
|
||||
|
||||
class MRPC(AbstractTask):
|
||||
name = "mrpc"
|
||||
labels_list = ["0", "1"]
|
||||
metric = [metrics.f1_score, metrics.accuracy]
|
||||
metric_names = ["f1", "accuracy"]
|
||||
split_to_data_split = {"train": "train",
|
||||
"validation": "validation",
|
||||
"test": "validation"}
|
||||
|
||||
def load_dataset(self, split):
|
||||
return datasets.load_dataset('glue', 'mrpc', split=split, script_version="master")
|
||||
|
||||
# def preprocessor(self, example, add_prefix=True):
|
||||
# src_texts = ["sentence1:", example['sentence1'],
|
||||
# "sentence2:", example["sentence2"]]
|
||||
# tgt_texts = [str(example['label'])]
|
||||
# return self.seq2seq_format(src_texts, tgt_texts, add_prefix)
|
||||
|
||||
|
||||
class COLA(AbstractTask):
|
||||
name = "cola"
|
||||
labels_list = ["0", "1"]
|
||||
metric = [metrics.matthews_corrcoef]
|
||||
metric_names = ["matthews_correlation"]
|
||||
split_to_data_split = {"train": "train",
|
||||
"validation": "validation",
|
||||
"test": "validation"}
|
||||
|
||||
def load_dataset(self, split):
|
||||
return datasets.load_dataset('glue', 'cola',
|
||||
split=split, script_version="master")
|
||||
|
||||
# def preprocessor(self, example, add_prefix=True):
|
||||
# src_texts = ["sentence:", example['sentence']]
|
||||
# tgt_texts = [str(example['label'])]
|
||||
# return self.seq2seq_format(src_texts, tgt_texts, add_prefix)
|
||||
|
||||
|
||||
class SST2(AbstractTask):
|
||||
name = "sst2"
|
||||
labels_list = ["0", "1"]
|
||||
metric = [metrics.accuracy]
|
||||
metric_names = ["accuracy"]
|
||||
split_to_data_split = {"train": "train",
|
||||
"validation": "validation",
|
||||
"test": "validation"}
|
||||
|
||||
def load_dataset(self, split):
|
||||
return datasets.load_dataset('glue', 'sst2',
|
||||
split=split, script_version="master")
|
||||
|
||||
def preprocessor(self, example, add_prefix=True):
|
||||
src_texts = ["sentence:", example['sentence']]
|
||||
tgt_texts = [str(example['label'])]
|
||||
return self.seq2seq_format(src_texts, tgt_texts, add_prefix)
|
||||
|
||||
|
||||
class STSB(AbstractTask):
|
||||
name = "stsb"
|
||||
labels_list = [str(np.round(label, decimals=1)) for label in np.arange(0, 5.2, 0.2)]
|
||||
metric = [metrics.pearson_corrcoef, metrics.spearman_corrcoef]
|
||||
metric_names = ["pearson", "spearmanr"]
|
||||
split_to_data_split = {"train": "train",
|
||||
"validation": "validation",
|
||||
"test": "validation"}
|
||||
|
||||
def load_dataset(self, split):
|
||||
return datasets.load_dataset('glue', 'stsb',
|
||||
split=split, script_version="master")
|
||||
|
||||
def preprocessor(self, example, add_prefix=True):
|
||||
src_texts = ["sentence1:", example['sentence1'],
|
||||
"sentence2:", example["sentence2"]]
|
||||
tgt_texts = [str(round_stsb_target(example['label']))]
|
||||
return self.seq2seq_format(src_texts, tgt_texts, add_prefix)
|
||||
|
||||
|
||||
class QQP(AbstractTask):
|
||||
name = "qqp"
|
||||
labels_list = ["0", "1"]
|
||||
metric = [metrics.f1_score, metrics.accuracy]
|
||||
metric_names = ["f1", "accuracy"]
|
||||
split_to_data_split = {"train": "train",
|
||||
"validation": "validation",
|
||||
"test": "validation"}
|
||||
|
||||
def load_dataset(self, split):
|
||||
return datasets.load_dataset('glue', 'qqp',
|
||||
split=split, script_version="master")
|
||||
|
||||
def preprocessor(self, example, add_prefix=True):
|
||||
src_texts = ["question1:", example['question1'],
|
||||
"question2:", example["question2"]]
|
||||
tgt_texts = [str(example['label'])]
|
||||
return self.seq2seq_format(src_texts, tgt_texts, add_prefix)
|
||||
|
||||
|
||||
class MNLI(AbstractTask):
|
||||
name = "mnli"
|
||||
labels_list = ["0", "1", "2"]
|
||||
split_to_data_split = {"train": "train",
|
||||
"validation": "validation_mismatched",
|
||||
"test": "validation_matched"}
|
||||
metric = [metrics.accuracy]
|
||||
metric_names = ["accuracy"]
|
||||
|
||||
|
||||
def load_dataset(self, split):
|
||||
return datasets.load_dataset('glue', 'mnli', split=split, script_version="master")
|
||||
|
||||
def preprocessor(self, example, add_prefix=True):
|
||||
src_texts = ["premise:", example['premise'],
|
||||
"hypothesis", example["hypothesis"]]
|
||||
tgt_texts = [str(example['label'])]
|
||||
return self.seq2seq_format(src_texts, tgt_texts, add_prefix)
|
||||
|
||||
|
||||
class QNLI(AbstractTask):
|
||||
name = "qnli"
|
||||
labels_list = ["0", "1"]
|
||||
metric = [metrics.accuracy]
|
||||
metric_names = ["accuracy"]
|
||||
split_to_data_split = {"train": "train",
|
||||
"validation": "validation",
|
||||
"test": "validation"}
|
||||
|
||||
def load_dataset(self, split):
|
||||
return datasets.load_dataset('glue', 'qnli', split=split, script_version="master")
|
||||
|
||||
def preprocessor(self, example, add_prefix=True):
|
||||
src_texts = ["question:", example['question'],
|
||||
"sentence:", example["sentence"]]
|
||||
tgt_texts = [str(example['label'])]
|
||||
return self.seq2seq_format(src_texts, tgt_texts, add_prefix)
|
||||
|
||||
class RTE(AbstractTask):
|
||||
name = "rte"
|
||||
labels_list = ["0", "1"]
|
||||
metric = [metrics.accuracy]
|
||||
metric_names = ["accuracy"]
|
||||
split_to_data_split = {"train": "train",
|
||||
"validation": "validation",
|
||||
"test": "validation"}
|
||||
|
||||
def load_dataset(self, split):
|
||||
return datasets.load_dataset('glue', 'rte',
|
||||
split=split, script_version="master")
|
||||
|
||||
def preprocessor(self, example, add_prefix=True):
|
||||
src_texts = ["sentence1:", example['sentence1'],
|
||||
"sentence2:", example["sentence2"]]
|
||||
tgt_texts = [str(example['label'])]
|
||||
return self.seq2seq_format(src_texts, tgt_texts, add_prefix)
|
||||
|
||||
|
||||
class WNLI(AbstractTask):
|
||||
name = "wnli"
|
||||
labels_list = ["0", "1"]
|
||||
metric = [metrics.accuracy]
|
||||
metric_names = ["accuracy"]
|
||||
split_to_data_split = {"train": "train",
|
||||
"validation": "validation",
|
||||
"test": "validation"}
|
||||
|
||||
def load_dataset(self, split):
|
||||
return datasets.load_dataset('glue', 'wnli', split=split, script_version="master")
|
||||
|
||||
def preprocessor(self, example, add_prefix=True):
|
||||
src_texts = ["sentence1:", example['sentence1'],
|
||||
"sentence2:", example["sentence2"]]
|
||||
tgt_texts = [str(example['label'])]
|
||||
return self.seq2seq_format(src_texts, tgt_texts, add_prefix)
|
||||
|
||||
|
||||
class SuperGLUEBoolQ(AbstractTask):
|
||||
name="superglue-boolq"
|
||||
labels_list = ['0', '1']
|
||||
metric = [metrics.accuracy]
|
||||
metric_names = ["accuracy"]
|
||||
split_to_data_split = {"train": "train",
|
||||
"validation": "validation",
|
||||
"test": "validation"}
|
||||
|
||||
def load_dataset(self, split):
|
||||
return datasets.load_dataset('super_glue', 'boolq', split=split, script_version="master")
|
||||
|
||||
def preprocessor(self, example, add_prefix=True):
|
||||
src_texts = ["question:", example["question"], "passage:", example["passage"]]
|
||||
tgt_texts = [str(example["label"])]
|
||||
return self.seq2seq_format(src_texts, tgt_texts, add_prefix)
|
||||
|
||||
|
||||
class SuperGLUERTE(AbstractTask):
|
||||
name="superglue-rte"
|
||||
labels_list = ['0', '1']
|
||||
split_to_data_split = {"train": "train",
|
||||
"validation": "validation",
|
||||
"test": "validation"}
|
||||
metric = [metrics.accuracy]
|
||||
metric_names = ["accuracy"]
|
||||
|
||||
def load_dataset(self, split):
|
||||
return datasets.load_dataset('super_glue', 'rte', split=split, script_version="master")
|
||||
|
||||
def preprocessor(self, example, add_prefix=True):
|
||||
src_texts = ["premise:", example["premise"],
|
||||
"hypothesis:", example["hypothesis"]]
|
||||
tgt_texts = [str(example["label"])]
|
||||
return self.seq2seq_format(src_texts, tgt_texts, add_prefix)
|
||||
|
||||
|
||||
class SuperGLUECB(AbstractTask):
|
||||
name = "superglue-cb"
|
||||
labels_list = ['0', '1', '2']
|
||||
split_to_data_split = {"train": "train",
|
||||
"validation": "validation",
|
||||
"test": "validation"}
|
||||
metric = [metrics.mean_multiclass_f1(num_classes=3), metrics.accuracy]
|
||||
metric_names = ["f1_multiclass", "accuracy"]
|
||||
|
||||
def load_dataset(self, split):
|
||||
return datasets.load_dataset('super_glue', 'cb', split=split, script_version="master")
|
||||
|
||||
def preprocessor(self, example, add_prefix=True):
|
||||
src_texts = ["premise:", example["premise"], "hypothesis:", example["hypothesis"]]
|
||||
tgt_texts = [str(example["label"])]
|
||||
return self.seq2seq_format(src_texts, tgt_texts, add_prefix)
|
||||
|
||||
|
||||
class SuperGLUECOPA(AbstractTask):
|
||||
name = "superglue-copa"
|
||||
labels_list = ['0', '1']
|
||||
split_to_data_split = {"train": "train",
|
||||
"validation": "validation",
|
||||
"test": "validation"}
|
||||
metric = [metrics.accuracy]
|
||||
metric_names = ["accuracy"]
|
||||
|
||||
def load_dataset(self, split):
|
||||
return datasets.load_dataset('super_glue', 'copa', split=split, script_version="master")
|
||||
|
||||
def preprocessor(self, example, add_prefix=True):
|
||||
src_texts = ["premise:", example["premise"],
|
||||
"choice1:", example["choice1"],
|
||||
"choice2:", example["choice2"]]
|
||||
tgt_texts = [str(example["label"])]
|
||||
return self.seq2seq_format(src_texts, tgt_texts, add_prefix)
|
||||
|
||||
|
||||
class SuperGLUEMultiRC(AbstractTask):
|
||||
name = "superglue-multirc"
|
||||
labels_list = ['0', '1']
|
||||
split_to_data_split = {"train": "train",
|
||||
"validation": "validation",
|
||||
"test": "validation"}
|
||||
metric = [metrics.multirc_f1_over_all_answers,
|
||||
metrics.mean_group_metric(metrics.exact_match)]
|
||||
metric_names = ["f1", "em"]
|
||||
|
||||
def load_dataset(self, split):
|
||||
return datasets.load_dataset('super_glue', 'multirc', split=split, script_version="master")
|
||||
|
||||
def remove_markup(self, text):
|
||||
"""Removes the HTML markup."""
|
||||
text = re.sub('<br>', ' ', text)
|
||||
text = re.sub('<(/)?b>', '', text)
|
||||
return text
|
||||
|
||||
def preprocessor(self, example, add_prefix=True):
|
||||
group = example['idx']['question']
|
||||
# T5 applies remove_markup to the joined string, but this should not make
|
||||
# any difference as well.
|
||||
# https://github.com/google-research/text-to-text-transfer-transformer/blob/a1352e625db7ec114062f99d99b0565b9e45c155/t5/data/preprocessors.py#L797
|
||||
src_texts = ["question:", self.remove_markup(example["question"]),
|
||||
"answer:", self.remove_markup(example["answer"]),
|
||||
"paragraph:", self.remove_markup(example["paragraph"])]
|
||||
tgt_texts = [str(example["label"])]
|
||||
return self.seq2seq_format(src_texts, tgt_texts, add_prefix, extra_fields={"group": group})
|
||||
|
||||
|
||||
|
||||
class SuperGLUEWIC(AbstractTask):
|
||||
name = "superglue-wic"
|
||||
labels_list = ['0', '1']
|
||||
split_to_data_split = {"train": "train",
|
||||
"validation": "validation",
|
||||
"test": "validation"}
|
||||
metric = [metrics.accuracy]
|
||||
metric_names = ["accuracy"]
|
||||
|
||||
def load_dataset(self, split):
|
||||
return datasets.load_dataset('super_glue', 'wic', split=split, script_version="master")
|
||||
|
||||
def preprocessor(self, example, add_prefix=True):
|
||||
src_texts = ["sentence1:", example["sentence1"],
|
||||
"sentence2:", example["sentence2"],
|
||||
"word:", example["word"]]
|
||||
tgt_texts = [str(example["label"])]
|
||||
return self.seq2seq_format(src_texts, tgt_texts, add_prefix)
|
||||
|
||||
|
||||
class SuperGLUEWSCFixed(AbstractTask):
|
||||
# source: https://github.com/google-research/text-to-text-transfer-transformer/blob/master/t5/data/preprocessors.py
|
||||
"""Convert WSC examples to text2text format.
|
||||
WSC includes a sentence along with 2 'spans': the first denoting a noun and
|
||||
the other a pronoun. The 'label' specifies whether or not the pronoun is
|
||||
referencing the noun. This preprocessor puts ' * ' around the noun and ' # '
|
||||
around the pronoun.
|
||||
For example, a typical example from WSC might look like
|
||||
{
|
||||
'text': 'This is a test sentence .',
|
||||
'span1_text': 'test',
|
||||
'span1_index': 3,
|
||||
'span2_text': 'This',
|
||||
'span2_index': 0,
|
||||
'label': 0
|
||||
}
|
||||
This example would be transformed to
|
||||
{
|
||||
'inputs': 'wsc text: # This # is a * test * sentence .',
|
||||
'targets': 'False'
|
||||
}
|
||||
"""
|
||||
name = "superglue-wsc.fixed"
|
||||
labels_list = ['0', '1']
|
||||
split_to_data_split = {"train": "train",
|
||||
"validation": "validation",
|
||||
"test": "validation"}
|
||||
metric = [metrics.accuracy]
|
||||
metric_names = ["accuracy"]
|
||||
|
||||
def load_dataset(self, split):
|
||||
return datasets.load_dataset('super_glue', 'wsc.fixed', split=split, script_version="master")
|
||||
|
||||
def _mark_span(self, text, span_str, span_idx, mark):
|
||||
pattern_tmpl = r'^((?:\S+\s){N})(W)'
|
||||
pattern = re.sub('N', str(span_idx), pattern_tmpl)
|
||||
pattern = re.sub('W', span_str, pattern)
|
||||
return re.sub(pattern, r'\1{0} \2 {0}'.format(mark), text)
|
||||
|
||||
def preprocessor(self, example, add_prefix=True):
|
||||
# converts text as done in T5.
|
||||
text = example['text']
|
||||
text = self._mark_span(text, example['span1_text'], example['span1_index'], '*')
|
||||
# Compensate for 2 added "words" added in previous step.
|
||||
span2_index = example['span2_index'] + 2 * int(example['span1_index'] < example['span2_index'])
|
||||
text = self._mark_span(text, example['span2_text'], span2_index, '#')
|
||||
src_texts = ["text:", text]
|
||||
tgt_texts = [str(example["label"])]
|
||||
return self.seq2seq_format(src_texts, tgt_texts, add_prefix)
|
||||
|
||||
|
||||
class SuperGLUERecord(AbstractTask):
|
||||
"""Convert ReCoRD examples to text2text examples.
|
||||
ReCoRD contains a passage, query containing a '@placeholder' string, and a set
|
||||
of entities that are the possible values of the placeholder. Each train and
|
||||
validation example will have a list of answers, any of which would be
|
||||
considered correct.
|
||||
For example, a typical example from ReCoRD might look like
|
||||
{
|
||||
'passsage': 'This is the passage.',
|
||||
'query': 'A @placeholder is a bird.',
|
||||
'entities': ['penguin', 'potato', 'pigeon'],
|
||||
'answers': ['penguin', 'pigeon'],
|
||||
}
|
||||
which this preprocessor would turn into the following two examples:
|
||||
{
|
||||
'inputs': 'record query: A @placeholder is a bird. entities: penguin, '
|
||||
'potato, pigeon passage: This is the passage.',
|
||||
'targets': 'penguin',
|
||||
}
|
||||
and
|
||||
{
|
||||
'inputs': 'record query: A @placeholder is a bird. entities: penguin, '
|
||||
'potato, pigeon passage: This is the passage.',
|
||||
'targets': 'pigeon',
|
||||
}
|
||||
"""
|
||||
name = "superglue-record"
|
||||
split_to_data_split = {"train": "train",
|
||||
"validation": "validation",
|
||||
"test": "validation"}
|
||||
metric = [metrics.squad]
|
||||
metric_names = ["squad"]
|
||||
|
||||
def load_dataset(self, split):
|
||||
return datasets.load_dataset('super_glue', 'record', split=split, script_version="master")
|
||||
|
||||
def preprocessor(self, batch, add_prefix=True):
|
||||
new_batch = collections.defaultdict(list)
|
||||
keys = batch.keys()
|
||||
for values in zip(*batch.values()):
|
||||
ex = {k: v for k, v in zip(keys, values)}
|
||||
# updates the passage.
|
||||
passage = ex['passage']
|
||||
passage = re.sub(r'(\.|\?|\!|\"|\')\n@highlight\n', r'\1 ', passage)
|
||||
passage = re.sub(r'\n@highlight\n', '. ', passage)
|
||||
inputs = f"record query: {ex['query']} entities: {', '.join(ex['entities'])} passage: {passage}"
|
||||
if add_prefix:
|
||||
inputs = self.name + " " + inputs
|
||||
# duplicates the samples based on number of answers.
|
||||
num_answers = len(ex["answers"])
|
||||
num_duplicates = np.maximum(1, num_answers)
|
||||
new_batch["source"].extend([inputs] * num_duplicates)
|
||||
new_batch["target"].extend(ex["answers"] if num_answers > 0 else ["<unk>"])
|
||||
new_batch["task"].extend([self.name] * num_duplicates)
|
||||
new_batch["extra_fields"].extend([{"answers": ex["answers"]}]*num_duplicates)
|
||||
return new_batch
|
||||
|
||||
def map_dataset(self, dataset, add_prefix=True):
|
||||
return dataset.map(functools.partial(self.preprocessor, add_prefix=add_prefix),
|
||||
batched=True, remove_columns=dataset.column_names)
|
||||
|
||||
|
||||
TASK_MAPPING = OrderedDict(
|
||||
[
|
||||
('squad', Squad),
|
||||
('mrpc', MRPC),
|
||||
('cola', COLA),
|
||||
('sst2', SST2),
|
||||
('qnli', QNLI),
|
||||
('rte', RTE),
|
||||
('wnli', WNLI),
|
||||
('mnli', MNLI),
|
||||
('qqp', QQP),
|
||||
('stsb', STSB),
|
||||
('superglue-boolq', SuperGLUEBoolQ),
|
||||
('superglue-rte', SuperGLUERTE),
|
||||
('superglue-cb', SuperGLUECB),
|
||||
('superglue-copa', SuperGLUECOPA),
|
||||
('superglue-multirc', SuperGLUEMultiRC),
|
||||
('superglue-wic', SuperGLUEWIC),
|
||||
('superglue-wsc.fixed', SuperGLUEWSCFixed),
|
||||
('superglue-record', SuperGLUERecord)
|
||||
]
|
||||
)
|
||||
|
||||
class AutoTask:
|
||||
@classmethod
|
||||
def get(self, task, config, seed=42):
|
||||
if task in TASK_MAPPING:
|
||||
return TASK_MAPPING[task](config, seed)
|
||||
raise ValueError(
|
||||
"Unrecognized task {} for AutoTask Model: {}.\n"
|
||||
"Task name should be one of {}.".format(
|
||||
", ".join(c for c in TASK_MAPPING.keys())
|
||||
)
|
||||
)
|
|
@ -0,0 +1,17 @@
|
|||
import numpy as np
|
||||
|
||||
def round_stsb_target(label):
|
||||
"""STSB maps two sentences to a floating point number between 1 and 5
|
||||
representing their semantic similarity. Since we are treating all tasks as
|
||||
text-to-text tasks we need to convert this floating point number to a string.
|
||||
The vast majority of the similarity score labels in STSB are in the set
|
||||
[0, 0.2, 0.4, ..., 4.8, 5.0]. So, we first round the number to the closest
|
||||
entry in this set, and then we convert the result to a string (literally e.g.
|
||||
"3.4"). This converts STSB roughly into a 26-class classification dataset.
|
||||
Args:
|
||||
label: original label.
|
||||
Returns:
|
||||
A preprocessed label.
|
||||
"""
|
||||
return np.round((label * 5) / 5, decimals=1)
|
||||
|
|
@ -0,0 +1,175 @@
|
|||
# several of the evaluation metrics are from https://github.com/google-research/text-to-text-transfer-transformer/blob/a1352e625db7ec114062f99d99b0565b9e45c155/t5/evaluation/metrics.py
|
||||
"""Defines different metrics used for evaluation of tasks."""
|
||||
import numpy as np
|
||||
import scipy
|
||||
import math
|
||||
import sklearn
|
||||
import collections
|
||||
from logging import getLogger
|
||||
from .qa_utils import normalize_squad, qa_metrics
|
||||
import sklearn.metrics
|
||||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
def accuracy(predictions, targets) -> dict:
|
||||
"""Computes the average accuracy."""
|
||||
return {"accuracy": 100 * ((np.array(predictions) == np.array(targets)).mean())}
|
||||
|
||||
def pearson_corrcoef(predictions, targets) -> dict:
|
||||
"""Computes Pearson correlation coefficient."""
|
||||
from examples_seq2seq.data_processors.postprocessors import string_to_float
|
||||
targets = [string_to_float(target) for target in targets]
|
||||
predictions= [string_to_float(prediction) for prediction in predictions]
|
||||
pearson_corrcoef = 100 * scipy.stats.pearsonr(targets, predictions)[0]
|
||||
|
||||
# Note that if all the predictions will be the same, spearman
|
||||
# correlation is nan, to gaurad against this, we check the output
|
||||
# and return 0 in this case.
|
||||
if math.isnan(pearson_corrcoef):
|
||||
pearson_corrcoef = 0
|
||||
return {"pearson": pearson_corrcoef}
|
||||
|
||||
|
||||
def spearman_corrcoef(predictions, targets) -> dict:
|
||||
"""Computes Spearman correlation coefficient."""
|
||||
# TODO: we need to do postprocessors in a clean way for each dataset.
|
||||
from examples_seq2seq.data_processors.postprocessors import string_to_float
|
||||
targets = [string_to_float(target) for target in targets]
|
||||
predictions= [string_to_float(prediction) for prediction in predictions]
|
||||
spearman_corrcoef = 100 * scipy.stats.spearmanr(targets, predictions)[0]
|
||||
|
||||
# Note that if all the predictions will be the same, spearman
|
||||
# correlation is nan, to gaurad against this, we check the output
|
||||
# and return 0 in this case.
|
||||
if math.isnan(spearman_corrcoef):
|
||||
spearman_corrcoef = 0
|
||||
return {"spearmanr": spearman_corrcoef}
|
||||
|
||||
def transform_for_generation(predictions, targets):
|
||||
mapping = {k: i for i, k in enumerate(set(targets))}
|
||||
|
||||
targets = np.asarray([mapping[k] for k in targets])
|
||||
predictions = np.asarray([mapping[k] if k in mapping else (t+1)%len(mapping) for t, k in zip(targets, predictions)])
|
||||
|
||||
return predictions, targets
|
||||
|
||||
|
||||
|
||||
def f1_score(predictions, targets) -> dict:
|
||||
"""Computes F1 score, with any prediction != 0 or 1 is counted as incorrect.
|
||||
Args:
|
||||
targets: list of targets, either 0 or 1
|
||||
predictions: list of predictions, any integer value
|
||||
Returns:
|
||||
F1 score, where any prediction != 0 or 1 is counted as wrong.
|
||||
"""
|
||||
targets = targets.astype(np.int32)
|
||||
predictions = predictions.astype(np.int32)
|
||||
return {"f1": 100 * sklearn.metrics.f1_score(targets, predictions)}
|
||||
|
||||
# TODO: maybe gaurd against invalid values https://stackoverflow.com/questions/56865344/how-do-i-calculate-the-matthews-correlation-coefficient-in-tensorflow
|
||||
def matthews_corrcoef(predictions, targets) -> dict:
|
||||
"""Computes the Matthews correlation coefficient."""
|
||||
return {"matthews_correlation": 100 * sklearn.metrics.matthews_corrcoef(targets, predictions)}
|
||||
|
||||
def squad(predictions, targets):
|
||||
"""Computes SQuAD metrics, maximizing over answers per question.
|
||||
Args:
|
||||
targets: list of lists of strings
|
||||
predictions: list of strings
|
||||
Returns:
|
||||
dict with score_key: squad score across all targets and predictions
|
||||
"""
|
||||
|
||||
targets = [[normalize_squad(t) for t in u] for u in targets]
|
||||
predictions = [normalize_squad(p) for p in predictions]
|
||||
return qa_metrics(targets, predictions)
|
||||
|
||||
|
||||
def exact_match(predictions, targets):
|
||||
"""Computes whether the targets match predictions exactly."""
|
||||
return {"em": 100 * float(np.array_equal(targets, predictions))}
|
||||
|
||||
|
||||
def sklearn_metrics_wrapper(metric_str,
|
||||
metric_dict_str=None,
|
||||
metric_post_process_fn=None,
|
||||
**metric_fn_kwargs):
|
||||
"""Wraps any sklearn.metric function and returns a t5 metric function.
|
||||
Args:
|
||||
metric_str: string, the function from `sklearn.metrics` to use.
|
||||
metric_dict_str: optional string, if not specified `metric_str` is used as
|
||||
the key in the returned dictionary.
|
||||
metric_post_process_fn: callable, if specified the final computed metric
|
||||
will be passed through this.
|
||||
**metric_fn_kwargs: kwargs, passed to the metric function we are calling.
|
||||
Returns:
|
||||
the function that calculates the metric in a dict.
|
||||
"""
|
||||
if not hasattr(sklearn.metrics, metric_str):
|
||||
raise ValueError("sklearn.metrics does not have: %s" % metric_str)
|
||||
|
||||
def fn(predictions, targets):
|
||||
metric_fn = getattr(sklearn.metrics, metric_str)
|
||||
metric_val = metric_fn(targets, predictions, **metric_fn_kwargs)
|
||||
if metric_post_process_fn is not None:
|
||||
metric_val = metric_post_process_fn(metric_val)
|
||||
return {metric_dict_str or metric_str: metric_val}
|
||||
return fn
|
||||
|
||||
|
||||
def mean_multiclass_f1(num_classes, **metric_fn_kwargs):
|
||||
"""Computes the unweighted average of the F1 per class."""
|
||||
return sklearn_metrics_wrapper(
|
||||
"fbeta_score",
|
||||
metric_dict_str="f1_multiclass",
|
||||
metric_post_process_fn=lambda x: 100 * x,
|
||||
beta=1,
|
||||
labels=range(num_classes),
|
||||
average="macro",
|
||||
**metric_fn_kwargs)
|
||||
|
||||
|
||||
def multirc_f1_over_all_answers(targets, predictions):
|
||||
"""Special metric for MultiRC which computes F1 score over all examples.
|
||||
This is necessary because the targets/predictions for MultiRC are dicts and
|
||||
the f1_score_with_invalid expects a list of True/False labels, not dicts. As
|
||||
a result we just need to key in the "value" for each of the example dicts
|
||||
before feeding into f1_score_with_invalid.
|
||||
Args:
|
||||
targets: list of dicts, where each dict has a "value" key.
|
||||
predictions: list of dicts, where each dict has a "value" key.
|
||||
Returns:
|
||||
F1 score over values, where any prediction != 0 or 1 is counted as wrong.
|
||||
"""
|
||||
return f1_score_with_invalid(
|
||||
[t["value"] for t in targets], [p["value"] for p in predictions]
|
||||
)
|
||||
|
||||
|
||||
def mean_group_metric(metric_fn, group_key="group", value_key="value"):
|
||||
"""Returns a metric that averages `metric_fn` on sub-groups of results.
|
||||
The sub-groups are defined by aggregating results (targets and predictions)
|
||||
by accessing the feature specified by `group_key` in the target dicts.
|
||||
**WARNING**: Using this function can produce unreliable results if you do not
|
||||
pass in full groups. For example, if you evaluate over a random subsample of a
|
||||
validation set and do not retain all of the examples in each group, you may
|
||||
get results which aren't directly comparable to using the full validation set.
|
||||
Args:
|
||||
metric_fn: function, the metric to compute on the subgroups.
|
||||
group_key: string, the key for the grouping value in the target dictionary.
|
||||
value_key: string, the key for the value in the dictionaries.
|
||||
"""
|
||||
def my_metric(targets, predictions):
|
||||
"""Computes mean of `metric_fn` over subgroups of results."""
|
||||
grouped_values = collections.defaultdict(lambda: ([], []))
|
||||
for targ, pred in zip(targets, predictions):
|
||||
g = targ[group_key]
|
||||
grouped_values[g][0].append(targ[value_key])
|
||||
grouped_values[g][1].append(pred[value_key])
|
||||
group_scores = collections.defaultdict(list)
|
||||
for (targets, predictions) in grouped_values.values():
|
||||
for metric, score in metric_fn(targets, predictions).items():
|
||||
group_scores[metric].append(score)
|
||||
return {metric: np.mean(scores) for metric, scores in group_scores.items()}
|
||||
return my_metric
|
|
@ -0,0 +1,96 @@
|
|||
# Copyright 2021 The T5 Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# source: the codes are from https://github.com/google-research/text-to-text-transfer-transformer
|
||||
"""Utilities for Question Answering (QA) evaluation.
|
||||
Matches results on the SQuAD (v1.1) and TriviaQA (v1.0) evaluation scripts.
|
||||
"""
|
||||
|
||||
import collections
|
||||
import string
|
||||
import regex as re
|
||||
import numpy as np
|
||||
|
||||
|
||||
def _normalize_answer(text, punc_chars, punc_repl):
|
||||
"""Lower text and remove punctuation, articles and extra whitespace."""
|
||||
|
||||
def remove_articles(s):
|
||||
return re.sub(r"\b(a|an|the)\b", " ", s)
|
||||
|
||||
def replace_punctuation(s):
|
||||
to_replace = set(punc_chars)
|
||||
return "".join(punc_repl if ch in to_replace else ch for ch in s)
|
||||
|
||||
def white_space_fix(s):
|
||||
return " ".join(s.split())
|
||||
|
||||
text = text.lower()
|
||||
text = replace_punctuation(text)
|
||||
text = remove_articles(text)
|
||||
text = white_space_fix(text)
|
||||
return text
|
||||
|
||||
|
||||
def normalize_trivia_qa(answer):
|
||||
"""Normalization used in official TriviaQA evaluation script."""
|
||||
return _normalize_answer(
|
||||
answer, punc_chars=string.punctuation + "‘’´`_", punc_repl=" ").strip()
|
||||
|
||||
|
||||
def normalize_squad(answer):
|
||||
"""Normalization used in official SQuAD evaluation script."""
|
||||
return _normalize_answer(answer, punc_chars=string.punctuation, punc_repl="")
|
||||
|
||||
|
||||
def _metric_max_over_ground_truths(metric_fn, ground_truths, prediction):
|
||||
"""Computes the maximum of the metric over all ground truths."""
|
||||
return max(
|
||||
metric_fn(ground_truth, prediction) for ground_truth in ground_truths
|
||||
)
|
||||
|
||||
|
||||
def _exact_match_score(target, prediction):
|
||||
return target == prediction
|
||||
|
||||
|
||||
def _f1_score(target, prediction):
|
||||
"""Computes token f1 score for a single target and prediction."""
|
||||
prediction_tokens = prediction.split()
|
||||
target_tokens = target.split()
|
||||
common = (collections.Counter(prediction_tokens) &
|
||||
collections.Counter(target_tokens))
|
||||
num_same = sum(common.values())
|
||||
if num_same == 0:
|
||||
return 0
|
||||
precision = 1.0 * num_same / len(prediction_tokens)
|
||||
recall = 1.0 * num_same / len(target_tokens)
|
||||
f1 = (2 * precision * recall) / (precision + recall)
|
||||
return f1
|
||||
|
||||
|
||||
def qa_metrics(targets, predictions):
|
||||
"""Computes exact match and f1 QA scores, expecting pre-normalized text."""
|
||||
if len(targets) != len(predictions):
|
||||
raise ValueError("Number of targets and predictions must match.")
|
||||
em = np.mean([
|
||||
_metric_max_over_ground_truths(_exact_match_score, t, p)
|
||||
for p, t in zip(predictions, targets)
|
||||
])
|
||||
f1 = np.mean([
|
||||
_metric_max_over_ground_truths(_f1_score, t, p)
|
||||
for p, t in zip(predictions, targets)
|
||||
])
|
||||
em *= 100
|
||||
f1 *= 100
|
||||
return {"em": em, "f1": f1}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.0002561697332863371,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/10940816",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 32,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.0017750209757755706,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/1107862",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 32,
|
||||
"per_device_train_batch_size": 32,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 16,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 8.499916262600587e-05,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/15328099",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 16,
|
||||
"per_device_train_batch_size": 16,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 32,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.0006091646696452159,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/15991793",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 32,
|
||||
"per_device_train_batch_size": 32,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 16,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.020109951371648067,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/19489534",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 16,
|
||||
"per_device_train_batch_size": 16,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 32,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.005159882530578781,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/2281342",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 32,
|
||||
"per_device_train_batch_size": 32,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 2,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.006869610954981632,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/26349674",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 32,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.0002723799659564822,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/28219263",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 32,
|
||||
"per_device_train_batch_size": 32,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.0018605158382269157,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/28244173",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 2,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.0001248231069039661,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/28313708",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 2,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.0009490000624893097,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/28844651",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 3.5602209401278214e-05,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/28881946",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 32,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.004220683008677483,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/29695566",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 32,
|
||||
"per_device_train_batch_size": 32,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 32,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.004159184883370181,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/304080",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 32,
|
||||
"per_device_train_batch_size": 32,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 2,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.0009353172054773991,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/33594301",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 32,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.0037650265946582574,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/37208828",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 32,
|
||||
"per_device_train_batch_size": 32,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 16,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 6.867655291394631e-05,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/38351436",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 16,
|
||||
"per_device_train_batch_size": 16,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 2,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.0022951686429675895,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/42338278",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 2,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.0011474682877585407,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/43419391",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.009965694572181888,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/45030088",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.0020236592832077785,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/50851153",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
{"batch_size": 64, "dataset_config_name": ["en"], "delta_type": "bitfit", "do_eval": true, "do_test": true, "do_train": true, "eval_dataset_config_name": ["en"], "eval_dataset_name": "mrpc", "eval_steps": 200, "evaluation_strategy": "steps", "gradient_accumulation_steps": 1, "greater_is_better": true, "learning_rate": 0.0020236592832077785, "load_best_model_at_end": true, "max_source_length": 128, "max_steps": 5000, "metric_for_best_model": "average_metrics", "model_name_or_path": "t5-base", "output_dir": "outputs_search/bitfit.mrpc.t5-base/50851153", "overwrite_output_dir": true, "per_device_eval_batch_size": 64, "per_device_train_batch_size": 64, "predict_with_generate": true, "push_to_hub": false, "save_steps": 200, "save_strategy": "steps", "save_total_limit": 1, "seed": 100, "split_validation_test": true, "task_name": "mrpc", "test_dataset_config_name": ["en"], "test_dataset_name": "mrpc", "tokenizer_name": "t5-base", "warmup_steps": 0}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.011098597581779427,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/57783553",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 2,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.0005414844782319124,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/6060488",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 2,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.016927560240899083,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/61860753",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 2,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 1.0141082015912518e-05,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/63232091",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 2,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.0018137027382556477,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/6329472",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.023938918670661075,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/64753972",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 16,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.08212873599011565,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/65221118",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 16,
|
||||
"per_device_train_batch_size": 16,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 4.8538530604501934e-05,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/66798551",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 32,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.0056649657801790786,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/67615376",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 32,
|
||||
"per_device_train_batch_size": 32,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.03495857107255486,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/6773136",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 2,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.00039059864620439417,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/68027569",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 2,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.0002642938525995798,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/68314189",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.037536374095955345,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/71501650",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 32,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.008866400032296955,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/73962149",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 32,
|
||||
"per_device_train_batch_size": 32,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.01086484610816823,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/83260414",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 2,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 1.2611496517588744e-05,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/83839551",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 32,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.0010110776655071255,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/85624941",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 32,
|
||||
"per_device_train_batch_size": 32,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 32,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.0005414844782319124,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/86039549",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 32,
|
||||
"per_device_train_batch_size": 32,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.0027955533792956614,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/89676181",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 2,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.0012573200149141731,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/91446644",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 2,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.001152480984285531,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/92427532",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 16,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.002464124578330328,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/93923515",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 16,
|
||||
"per_device_train_batch_size": 16,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 16,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.000127337205276883,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/96799644",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 16,
|
||||
"per_device_train_batch_size": 16,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 2,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.017304287780519442,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/97118516",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.057233123182472576,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/97177600",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.041620230849224296,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/97660529",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 2,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.0005420479832650441,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/98459622",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 32,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.0026938134462562973,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/99566760",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 32,
|
||||
"per_device_train_batch_size": 32,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 2,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.00702408842393251,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/99826259",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "mrpc",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 2,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.00702408842393251,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.mrpc.t5-base/99826259",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "mrpc",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "mrpc",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
Binary file not shown.
After Width: | Height: | Size: 103 KiB |
Binary file not shown.
After Width: | Height: | Size: 186 KiB |
Binary file not shown.
After Width: | Height: | Size: 34 KiB |
Binary file not shown.
After Width: | Height: | Size: 56 KiB |
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "rte",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 2,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 1.1032607780913182e-05,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.rte.t5-base/1123702",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "rte",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "rte",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "rte",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 9.869021064463024e-05,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.rte.t5-base/12173417",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "rte",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "rte",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "rte",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.000913136097576348,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.rte.t5-base/14983360",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "rte",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "rte",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "rte",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 2,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 1.1605972169428286e-05,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.rte.t5-base/17148549",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "rte",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "rte",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 32,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "rte",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 2.8707127478048054e-05,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.rte.t5-base/18069491",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 32,
|
||||
"per_device_train_batch_size": 32,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "rte",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "rte",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 32,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "rte",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.000194974976225138,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.rte.t5-base/20719975",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 32,
|
||||
"per_device_train_batch_size": 32,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "rte",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "rte",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 32,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "rte",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.03781286205477464,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.rte.t5-base/26158876",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 32,
|
||||
"per_device_train_batch_size": 32,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "rte",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "rte",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 32,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "rte",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.0618810008699179,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.rte.t5-base/28522034",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 32,
|
||||
"per_device_train_batch_size": 32,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "rte",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "rte",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "rte",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 2,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 6.262592496186088e-05,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.rte.t5-base/29099149",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "rte",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "rte",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "rte",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 4.8538530604501934e-05,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.rte.t5-base/30778533",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "rte",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "rte",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 16,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "rte",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.0009445961555576889,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.rte.t5-base/33442523",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 16,
|
||||
"per_device_train_batch_size": 16,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "rte",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "rte",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 16,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "rte",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.020109951371648067,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.rte.t5-base/35699804",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 16,
|
||||
"per_device_train_batch_size": 16,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "rte",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "rte",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "rte",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 2,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 6.0263760479697114e-05,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.rte.t5-base/41924547",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "rte",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "rte",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "rte",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 2,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 7.833953000267327e-05,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.rte.t5-base/45992418",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "rte",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "rte",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "rte",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.0005632466045355159,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.rte.t5-base/46821674",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "rte",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "rte",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "rte",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.008374542128252581,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.rte.t5-base/47176009",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "rte",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "rte",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 0
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "rte",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 2,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 7.170024484707928e-05,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.rte.t5-base/47432895",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "rte",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "rte",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "rte",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.00043437143218908386,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.rte.t5-base/47615745",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "rte",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "rte",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "rte",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 2,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 0.00010014550655645348,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.rte.t5-base/49804166",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "rte",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "rte",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 16,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "rte",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 1,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 6.867655291394631e-05,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.rte.t5-base/52735972",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 16,
|
||||
"per_device_train_batch_size": 16,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "rte",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "rte",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"batch_size": 64,
|
||||
"dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"delta_type": "bitfit",
|
||||
"do_eval": true,
|
||||
"do_test": true,
|
||||
"do_train": true,
|
||||
"eval_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"eval_dataset_name": "rte",
|
||||
"eval_steps": 200,
|
||||
"evaluation_strategy": "steps",
|
||||
"gradient_accumulation_steps": 2,
|
||||
"greater_is_better": true,
|
||||
"learning_rate": 1.1566052650322366e-05,
|
||||
"load_best_model_at_end": true,
|
||||
"max_source_length": 128,
|
||||
"max_steps": 5000,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"model_name_or_path": "t5-base",
|
||||
"output_dir": "outputs_search/bitfit.rte.t5-base/56419593",
|
||||
"overwrite_output_dir": true,
|
||||
"per_device_eval_batch_size": 64,
|
||||
"per_device_train_batch_size": 64,
|
||||
"predict_with_generate": true,
|
||||
"push_to_hub": false,
|
||||
"save_steps": 200,
|
||||
"save_strategy": "steps",
|
||||
"save_total_limit": 1,
|
||||
"seed": 100,
|
||||
"split_validation_test": true,
|
||||
"task_name": "rte",
|
||||
"test_dataset_config_name": [
|
||||
"en"
|
||||
],
|
||||
"test_dataset_name": "rte",
|
||||
"tokenizer_name": "t5-base",
|
||||
"warmup_steps": 500
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue