This commit is contained in:
shengdinghu 2022-04-14 11:22:41 +08:00
parent 5d2dab3284
commit 6ebfab4a12
690 changed files with 1860 additions and 500795 deletions

View File

@ -1,4 +1,4 @@
import numpy as np
import numpy as np
from dataclasses import dataclass
from transformers import DataCollatorForSeq2Seq
@ -6,11 +6,23 @@ from transformers import DataCollatorForSeq2Seq
@dataclass
class TaskDataCollatorForSeq2Seq(DataCollatorForSeq2Seq):
def check_uniqueness(self, samples):
assert len(np.unique(samples)) == 1
assert len(np.unique(samples)) == 1
def __call__(self, features):
# tasks = [d.pop('task') for d in features]
# self.check_uniqueness(tasks)
output = super().__call__(features)
# output["task"] = tasks[0]
return output
return output
# class CustomDataCollator(DefaultDataCollator):
# def check_uniqueness(self, samples):
# assert len(np.unique(samples)) == 1
# def __call__(self, features):
# mask_positions = [d.pop('mask_positions') for d in features]
# # self.check_uniqueness(tasks)
# output = super().__call__(features)
# # output["task"] = tasks[0]
# return output

View File

@ -1,4 +1,4 @@
# from openprompt.prompts import ManualTemplate
class BasePrompt(object):
def __init__(self, template_id=0, verbalizer_id=0, generation=True):
@ -9,26 +9,28 @@ class BasePrompt(object):
self.verbalizer = self.mlmhead_verbalizers[verbalizer_id]
def __call__(self, example):
def eval_syntax(syntaxlist, example):
composed = []
for x in syntaxlist:
if x.startswith("[_eval_]"):
t = eval(x[len("[_eval_]"):])
t = eval(x[len("[_eval_]"):])
else:
t = x
composed.append(t)
return composed
src_texts = eval_syntax(self.template,example)
tgt_texts = self.verbalizer[str(example['label'])]
if isinstance(tgt_texts, list):
tgt_texts = eval_syntax(tgt_texts, example)
else:
tgt_texts = [tgt_texts]
return src_texts, tgt_texts
@ -48,7 +50,7 @@ class MRPCPrompt(BasePrompt):
"1": "same"
}
textual_templates = [
["sentence1:", """[_eval_]example['sentence1']""",
["sentence1:", """[_eval_]example['sentence1']""",
"sentence2:", """[_eval_]example["sentence2"]""", "Meanings different of same? Answer: " ]
]
@ -68,7 +70,7 @@ class BoolQPrompt(BasePrompt):
"1": "same"
}
textual_templates = [
["sentence1:", """[_eval_]example['sentence1']""",
["sentence1:", """[_eval_]example['sentence1']""",
"sentence2:", """[_eval_]example["sentence2"]""", "Meanings different of same? Answer: " ]
]
@ -84,7 +86,7 @@ class BoolQPrompt(BasePrompt):
"1": "yes"
}
textual_templates = [
["hypothesis:", """[_eval_]example['hypothesis']""",
["hypothesis:", """[_eval_]example['hypothesis']""",
"premise:", """[_eval_]example["premise"]""", "The answer was " ]
]
@ -100,7 +102,7 @@ class COLAPrompt(BasePrompt):
"1": "Yes"
}
textual_templates = [
["sentence:", """[_eval_]example['sentence']""",
["sentence:", """[_eval_]example['sentence']""",
"grammar correct? " ]
]
@ -119,7 +121,7 @@ class RTEPrompt(BasePrompt):
textual_templates = [
["sentence1:", """[_eval_]example['premise']""", "sentence2:",
"""[_eval_]example['hypothesis']""",
"The answer was " ]
"The answer was "]
]
class CBPrompt(BasePrompt):
@ -147,6 +149,5 @@ PromptCollections = {
"superglue-boolq": BoolQPrompt,
"cb": CBPrompt,
}

View File

@ -1,5 +1,5 @@
from collections import OrderedDict
import collections
import collections
import abc
import functools
from selectors import EpollSelector
@ -12,10 +12,213 @@ import logging
import numpy as np
import torch
import re
from examples_prompt.data_processors.prompt import PromptCollections
from openprompt.prompts import ManualTemplate, ManualVerbalizer
from openprompt.plms.utils import TokenizerWrapper
from openprompt.data_utils import InputExample
import itertools
logger = logging.getLogger(__name__)
from transformers.models.auto.tokenization_auto import tokenizer_class_from_name
from typing import List, Dict
from collections import defaultdict
from openprompt.utils import round_list
import warnings
class MLMTokenizerWrapper:
def __init__(self, max_seq_length, tokenizer, truncate_method):
self.max_seq_length=max_seq_length
self.tokenizer=tokenizer
self.num_special_tokens_to_add = len(tokenizer("")['input_ids'])
# from IPython import embed; embed(header="Truega")
self.truncate_method=truncate_method
self.total_passed_sentences = 0
self.num_truncated_sentences = 0
if truncate_method=='tail':
self.truncate_fct = self.truncate_from_tail
elif truncate_method=='head':
self.truncate_fct = self.truncate_from_head
elif truncate_method == 'balanced':
self.truncate_fct = self.balanced_truncate
else:
raise NotImplementedError
def merge_wrapped_example(self, wrapped_example, ):
''' # TODO doens't consider the situation that input has two parts
'''
wrapped_example, others = wrapped_example
# for some dataset like SuperGLUE.COPA, the answer requires prediction an span of
# the input. Or in generation tasks, we need to generate a piece of target_text.
# In these case, it tokenized to the encoded_tgt_text for furture use.
encoder_inputs = defaultdict(list)
for piece in wrapped_example:
encode_text = self.tokenizer.encode(piece['text'], add_special_tokens=False, return_special_tokens_mask=True )
encoder_inputs['input_ids'].append(encode_text)
encoder_inputs['shortenable_ids'].append([piece['shortenable_ids']] * len(encode_text))
encoder_inputs = self.truncate(encoder_inputs=encoder_inputs)
encoder_inputs.pop("shortenable_ids")
encoder_inputs = self.concate_parts(input_dict=encoder_inputs)
decoded_inputs = self.tokenizer.decode(encoder_inputs['input_ids'], clean_up_tokenization_spaces=False)
# again_encode = self.tokenizer.encode(decoded_inputs, add_special_tokens=False, return_special_tokens_mask=True)
# if len(again_encode)> self.max_seq_length - 2:
# print("length exceed!")
# print(wrapped_example)
# print(encoder_inputs['input_ids'])
# print(again_encode)
# print(decoded_inputs)
# exit()
# delete shortenable ids
# encoder_inputs = self.concate_parts(input_dict=encoder_inputs)
# encoder_inputs = self.add_special_tokens(encoder_inputs=encoder_inputs)
# # create special input ids
# encoder_inputs['attention_mask'] = [1] *len(encoder_inputs['input_ids'])
# # padding
# encoder_inputs = self.padding(input_dict=encoder_inputs, max_len=self.max_seq_length, pad_id_for_inputs=self.tokenizer.pad_token_id)
return decoded_inputs
@staticmethod
def balanced_truncate(input_dict: Dict,
num_tokens_to_truncate: int=0) -> Dict:
'''truncate the inputs with balance, number of cut tokens is proportional to the part's length.
'''
shortenable_lens = [len(parts) if parts[0]==1 else 0
for parts in input_dict['shortenable_ids']]
total_shortenable_len = sum(shortenable_lens)
num_tokens_to_truncate_each_part = [part_len/total_shortenable_len*num_tokens_to_truncate
for part_len in shortenable_lens]
round_list(num_tokens_to_truncate_each_part, num_tokens_to_truncate)
truncated_example = defaultdict(list)
for key in input_dict:
parts = input_dict[key]
for num_tokens_to_truncate_part, part in zip(num_tokens_to_truncate_each_part, parts):
truncated_example[key].append(part[:len(part)-num_tokens_to_truncate_part])
return truncated_example
@staticmethod
def truncate_from_tail(input_dict: Dict,
num_tokens_to_truncate: int=0) -> Dict:
r"""truncate the inputs from the rear
"""
truncated_example = defaultdict(list)
shortenable_ids = input_dict['shortenable_ids']
for key in input_dict:
parts = input_dict[key]
to_trunc = num_tokens_to_truncate
for i, part in enumerate(parts[::-1]):
if len(part) == 0: # to prevent some part are empty after tokenization
continue
if shortenable_ids[-1-i][0]==0: # ==0 means the part is not shortenable
continue
parts[-1-i] = part[:-to_trunc] if to_trunc<len(part) else []
to_trunc -= len(part)
if to_trunc <= 0:
break
truncated_example[key] = parts
return truncated_example
@staticmethod
def truncate_from_head(input_dict: Dict,
num_tokens_to_truncate: int=0) -> Dict:
r"""truncate the inputs from the head
"""
truncated_example = defaultdict(list)
shortenable_ids = input_dict['shortenable_ids']
for key in input_dict:
parts = input_dict[key]
to_trunc = num_tokens_to_truncate
for i, part in enumerate(parts):
if shortenable_ids[i][0]==0: # ==0 means the part is not shortenable
continue
parts[i] = part[:-to_trunc] if to_trunc<len(part) else []
to_trunc -= len(part)
if to_trunc <= 0:
break
truncated_example[key] = parts
return truncated_example
@staticmethod
def concate_parts(input_dict: Dict) -> Dict:
for key in input_dict:
input_dict[key] = list(itertools.chain(*input_dict[key]))
return input_dict
# @staticmethod
# def padding(input_dict: Dict,
# max_len: int, pad_id_for_inputs: int=0, pad_id_for_others: int=0) -> None:
# for key, value in input_dict.items():
# if (len(input_dict[key]) > max_len):
# raise ValueError(f'''
# Truncated seq length of '{key}' still greater than max length '{max_len}.'
# One possible reason is that no enough shortenable parts in template. Try add {{"shortenable": "True"}} property.
# ''')
# if 'input' in key:
# input_dict[key].extend([pad_id_for_inputs]*(max_len-len(value)))
# else:
# input_dict[key].extend([pad_id_for_others]*(max_len-len(value)))
# return input_dict
# def add_special_tokens(self, encoder_inputs):
# # add special tokens
# for key in encoder_inputs:
# if key == "input_ids":
# with warnings.catch_warnings():
# warnings.simplefilter("ignore")
# encoder_inputs[key] = self.tokenizer.build_inputs_with_special_tokens(
# encoder_inputs[key])
# return encoder_inputs
def truncate(self, encoder_inputs):
total_tokens = sum([len(part) for part in encoder_inputs['input_ids']])
num_specials = self.num_special_tokens_to_add
# print("num_specials", num_specials)
num_tokens_to_truncate = total_tokens - self.max_seq_length + num_specials
self.total_passed_sentences+=1
if num_tokens_to_truncate>0:
self.num_truncated_sentences += 1
if num_tokens_to_truncate > sum([len(x) for x in encoder_inputs['shortenable_ids']]):
raise RuntimeError("num_tokens_to_truncate larger than number of shortenable tokens.")
encoder_inputs = self.truncate_fct(input_dict=encoder_inputs,
num_tokens_to_truncate=num_tokens_to_truncate)
return encoder_inputs
class AbstractTask(abc.ABC):
name = NotImplemented
config = NotImplemented
@ -28,26 +231,44 @@ class AbstractTask(abc.ABC):
{"train": "train", "validation": "validation", "test": "test"}
small_datasets_without_all_splits = ["cola", "wnli", "rte", "superglue-cb", "superglue-copa", "superglue-multirc",
"superglue-wic", "superglue-wsc.fixed", "superglue-rte", "mrpc", "stsb",
"superglue-boolq"]
large_data_without_all_splits = ["qqp", "qnli", "superglue-record", "sst2"]
"superglue-boolq", "qqp", "qnli", "superglue-record", "sst2"]
large_data_without_all_splits = [] #["qqp", "qnli", "superglue-record", "sst2"]
def __init__(self, config, seed=42):
def __init__(self, config, data_args, tokenizer, predict_with_generate, seed=42, default_max_length=1):
self.config = config
self.seed = seed
self.data_args = data_args
self.tokenizer = tokenizer
self.predict_with_generate = predict_with_generate
self.default_max_length = default_max_length
self.truncate_method = getattr(data_args, "truncate_method", "balanced")
tid = getattr(config, "template_id", 0)
vid = getattr(config, "verbalizer_id", 0)
generation_paradigm = getattr(config, "generation_paradigm", True)
self.prompt = PromptCollections[self.name](tid, vid, generation_paradigm)
def get_max_target_length(self, tokenizer, default_max_length):
if self.prompt.verbalizer is not None:
return max([len(tokenizer.encode(label)) for key, label in self.prompt.verbalizer.items()])
return default_max_length
self.template = ManualTemplate(tokenizer=self.tokenizer, text = self.templates_text[tid])
self.verbalizer = ManualVerbalizer(tokenizer=self.tokenizer, classes = self.labels_list, label_words=self.verbalizers[vid])
# if self.predict_with_generate:
# self.reverse_verbalizer = {(int(x) for x in self.tokenizer.convert_tokens_to_ids(self.tokenizer.tokenize(self.verbalizer[label]))): label for label in self.labels_list}
# else:
# self.reverse_verbalizer = {int(self.tokenizer.convert_tokens_to_ids(self.tokenizer.tokenize(self.verbalizer[label]))[0]): label for label in self.labels_list}
self.tokenizer_wrapper = MLMTokenizerWrapper(max_seq_length=self.data_args.max_source_length, tokenizer=self.tokenizer, truncate_method=self.truncate_method)
generation_paradigm = getattr(config, "generation_paradigm", True)
# self.prompt = PromptCollections[self.name](tid, vid, generation_paradigm)
self.max_target_length = self.get_max_target_length(self.default_max_length)
def get_max_target_length(self, default_max_length):
if self.predict_with_generate:
return max([len(label) for key, label in self.verbalizer.label_words_ids.items()])
else:
return default_max_length
def seq2seq_format(self, source, target, extra_fields={}
):
return {'source': ' '.join(source),
'target': ' '.join(target),
'task': self.name,
@ -59,7 +280,7 @@ class AbstractTask(abc.ABC):
n_obs = total_size
logger.warning("n_obs is set to %s", n_obs)
return n_obs
def shuffled_indices(self, dataset):
num_samples = len(dataset)
generator = torch.Generator()
@ -91,19 +312,42 @@ class AbstractTask(abc.ABC):
else:
return indices[validation_size:]
def map_dataset(self, dataset, add_prefix):
# from IPython import embed; embed(header="in get target length")
return dataset.map(self.preprocessor)
def map_dataset(self, dataset):
# from IPython import embed; embed(header="in get target length")
return dataset.map(self.preprocessor).map(self.tokenizer_preprocessor)
def preprocessor(self, example):
source, target = self.prompt(example)
return self.seq2seq_format(source, target, extra_fields={})
def get(self, split, add_prefix=True, n_obs=None, split_validation_test=False):
return example
def tokenizer_preprocessor(self, example):
# source, target = example
# from IPython import embed; embed(header="Trehre2")
label = example['label']
guid = example['idx']
meta = dict(example)
meta.pop("label")
meta.pop("idx")
# from IPython import embed; embed(header="Trehre2")
e = InputExample(**{"meta": meta, 'label': label, 'guid': guid})
template_e = self.template.wrap_one_example(e)
encoded_sentence = self.tokenizer_wrapper.merge_wrapped_example(template_e)
if self.predict_with_generate:
# return {"source": encoded_sentence, 'target': ', 'extra_fields':[]}
raise NotImplementedError
else:
return {"source": encoded_sentence, "label": label, 'target': '', 'extra_fields':{'dataset_name':self.name}}
def get(self, split, n_obs=None, split_validation_test=False):
# For small datasets (n_samples < 10K) without test set, we divide validation set to
# half, use one half as test set and one half as validation set.
if split in ["eval", "dev", "valid"]:
split = "validation"
if split_validation_test and self.name in self.small_datasets_without_all_splits \
and split != "train":
mapped_split = self.split_to_data_split["validation"]
@ -124,7 +368,7 @@ class AbstractTask(abc.ABC):
# shuffles the data and samples it.
if n_obs is not None:
dataset = self.subsample(dataset, n_obs)
return self.map_dataset(dataset, add_prefix)
return self.map_dataset(dataset)
class Squad(AbstractTask):
name = "squad"
@ -190,6 +434,10 @@ class SST2(AbstractTask):
"validation": "validation",
"test": "validation"}
verbalizers = [
]
def load_dataset(self, split):
return datasets.load_dataset('glue', 'sst2',
split=split, script_version="master")
@ -278,6 +526,7 @@ class QNLI(AbstractTask):
tgt_texts = [str(example['label'])]
return self.seq2seq_format(src_texts, tgt_texts, add_prefix)
#Tested
class RTE(AbstractTask):
name = "rte"
labels_list = ["0", "1"]
@ -287,15 +536,56 @@ class RTE(AbstractTask):
"validation": "validation",
"test": "validation"}
templates_text = [
"""sentence1: {"meta": 'sentence1', "shortenable":True}. sentence2:,"""+
"""{"meta":"sentence2", "shortenable":True}. The answer was {"mask"}.""",
]
verbalizers = [{
"0": "yes",
"1": "no"
}]
def load_dataset(self, split):
return datasets.load_dataset('glue', 'rte',
if self.data_args.datasets_load_from_disk:
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/glue.rte")[split]
else:
return datasets.load_dataset('glue', 'rte',
split=split, script_version="master")
def preprocessor(self, example, add_prefix=True):
src_texts = ["sentence1:", example['sentence1'],
"sentence2:", example["sentence2"]]
tgt_texts = [str(example['label'])]
return self.seq2seq_format(src_texts, tgt_texts, add_prefix)
#Tested
class SuperGLUEBoolQ(AbstractTask):
name="superglue-boolq"
labels_list = ['0', '1']
metric = [metrics.accuracy]
metric_names = ["accuracy"]
split_to_data_split = {"train": "train",
"validation": "validation",
"test": "validation"}
verbalizers = [
{
"0": "no",
"1": "yes"
},
]
mlmhead_verbalizers = {
"0": "no",
"1": "yes"
}
templates_text = [
"""hypothesis: {"meta": "question", "shortenable":True} premise: {"meta":"passage", "shortenable":True} The answer was {"mask"}."""
]
def load_dataset(self, split):
if self.data_args.datasets_load_from_disk:
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/super_glue.boolq")[split]
else:
return datasets.load_dataset('super_glue', 'boolq', split=split, script_version="master")
class WNLI(AbstractTask):
@ -307,53 +597,23 @@ class WNLI(AbstractTask):
"validation": "validation",
"test": "validation"}
def load_dataset(self, split):
return datasets.load_dataset('glue', 'wnli', split=split, script_version="master")
verbalizers = [{
"0": "True",
"1": "False",
}]
templates_text = [
"""{"meta": 'sentence1',"shortenable":True} Does it mean the following: "{"meta":'sentence2'}"? {"mask"}."""
]
def preprocessor(self, example, add_prefix=True):
src_texts = ["sentence1:", example['sentence1'],
"sentence2:", example["sentence2"]]
tgt_texts = [str(example['label'])]
return self.seq2seq_format(src_texts, tgt_texts, add_prefix)
class SuperGLUEBoolQ(AbstractTask):
name="superglue-boolq"
labels_list = ['0', '1']
metric = [metrics.accuracy]
metric_names = ["accuracy"]
split_to_data_split = {"train": "train",
"validation": "validation",
"test": "validation"}
def load_dataset(self, split):
return datasets.load_dataset('super_glue', 'boolq', split=split, script_version="master")
def preprocessor(self, example, add_prefix=True):
src_texts = ["question:", example["question"], "passage:", example["passage"]]
tgt_texts = [str(example["label"])]
return self.seq2seq_format(src_texts, tgt_texts, add_prefix)
class SuperGLUERTE(AbstractTask):
name="superglue-rte"
labels_list = ['0', '1']
split_to_data_split = {"train": "train",
"validation": "validation",
"test": "validation"}
metric = [metrics.accuracy]
metric_names = ["accuracy"]
def load_dataset(self, split):
return datasets.load_dataset('super_glue', 'rte', split=split, script_version="master")
def preprocessor(self, example, add_prefix=True):
src_texts = ["premise:", example["premise"],
"hypothesis:", example["hypothesis"]]
tgt_texts = [str(example["label"])]
return self.seq2seq_format(src_texts, tgt_texts, add_prefix)
if self.data_args.datasets_load_from_disk:
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/glue.wnli")[split]
else:
return datasets.load_dataset('glue', 'wnli', split=split, script_version="master")
#
class SuperGLUECB(AbstractTask):
name = "superglue-cb"
labels_list = ['0', '1', '2']
@ -363,13 +623,20 @@ class SuperGLUECB(AbstractTask):
metric = [metrics.mean_multiclass_f1(num_classes=3), metrics.accuracy]
metric_names = ["f1_multiclass", "accuracy"]
def load_dataset(self, split):
return datasets.load_dataset('super_glue', 'cb', split=split, script_version="master")
verbalizers = [{
"0": "yes",
"1": "no",
"2": "maybe"
}]
templates_text = [
"""hypothesis: {"meta": 'hypothesis',"shortenable":True} premise: {"meta":'premise', "shortenable":True} The answer was {"mask"}."""
]
def preprocessor(self, example, add_prefix=True):
src_texts = ["premise:", example["premise"], "hypothesis:", example["hypothesis"]]
tgt_texts = [str(example["label"])]
return self.seq2seq_format(src_texts, tgt_texts, add_prefix)
def load_dataset(self, split):
if self.data_args.datasets_load_from_disk:
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/super_glue.cb")[split]
else:
return datasets.load_dataset('super_glue', 'cb', split=split, script_version="master")
class SuperGLUECOPA(AbstractTask):
@ -379,17 +646,21 @@ class SuperGLUECOPA(AbstractTask):
"validation": "validation",
"test": "validation"}
metric = [metrics.accuracy]
metric_names = ["accuracy"]
metric_names = ["accuracy"]
verbalizers = [{
"0": "1",
"1": "2",
}]
templates_text = [
"""choice1: {"meta":"choice1"} choice2: {"meta":"choice2"} premise: {"meta":"premise", "shortenable":True} The {"meta":"question"} answer was choice{"mask"}."""
]
def load_dataset(self, split):
return datasets.load_dataset('super_glue', 'copa', split=split, script_version="master")
def preprocessor(self, example, add_prefix=True):
src_texts = ["premise:", example["premise"],
"choice1:", example["choice1"],
"choice2:", example["choice2"]]
tgt_texts = [str(example["label"])]
return self.seq2seq_format(src_texts, tgt_texts, add_prefix)
if self.data_args.datasets_load_from_disk:
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/super_glue.copa")[split]
else:
return datasets.load_dataset('super_glue', 'copa', split=split, script_version="master")
class SuperGLUEMultiRC(AbstractTask):
@ -398,31 +669,47 @@ class SuperGLUEMultiRC(AbstractTask):
split_to_data_split = {"train": "train",
"validation": "validation",
"test": "validation"}
metric = [metrics.multirc_f1_over_all_answers,
metrics.mean_group_metric(metrics.exact_match)]
metric = [metrics.f1_score,
metrics.accuracy]
metric_names = ["f1", "em"]
# generation_verbalizers = [{
# "0": "no",
# "1": "yes",
# },
# ]
verbalizers = [{
"0": "no",
"1": "yes",
}]
templates_text = [
"""question: {"meta":"question", "shortenable":False} answer: {"meta":"answer", "shortenable":False, "post_processing": lambda x:x+"."} paragraph: {"meta":"paragraph", "shortenable":True} The answer was {"mask"}."""
]
def load_dataset(self, split):
return datasets.load_dataset('super_glue', 'multirc', split=split, script_version="master")
if self.data_args.datasets_load_from_disk:
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/super_glue.multirc")[split]
else:
return datasets.load_dataset('super_glue', 'multirc', split=split, script_version="master")
def remove_markup(self, text):
"""Removes the HTML markup."""
text = re.sub('<br>', ' ', text)
text = re.sub('<(/)?b>', '', text)
return text
return text
def preprocessor(self, example, add_prefix=True):
group = example['idx']['question']
# T5 applies remove_markup to the joined string, but this should not make
def preprocessor(self, example):
# T5 applies remove_markup to the joined string, but this should not make
# any difference as well.
# https://github.com/google-research/text-to-text-transfer-transformer/blob/a1352e625db7ec114062f99d99b0565b9e45c155/t5/data/preprocessors.py#L797
src_texts = ["question:", self.remove_markup(example["question"]),
"answer:", self.remove_markup(example["answer"]),
"paragraph:", self.remove_markup(example["paragraph"])]
tgt_texts = [str(example["label"])]
return self.seq2seq_format(src_texts, tgt_texts, add_prefix, extra_fields={"group": group})
# https://github.com/google-research/text-to-text-transfer-transformer/blob/a1352e625db7ec114062f99d99b0565b9e45c155/t5/data/preprocessors.py#L797
example["question"] = self.remove_markup(example["question"])
example["answer"] = self.remove_markup(example["answer"])
example["paragraph"] = self.remove_markup(example["paragraph"])
return example
class SuperGLUEWIC(AbstractTask):
name = "superglue-wic"
@ -431,68 +718,75 @@ class SuperGLUEWIC(AbstractTask):
"validation": "validation",
"test": "validation"}
metric = [metrics.accuracy]
metric_names = ["accuracy"]
metric_names = ["accuracy"]
verbalizers = [{
"0": "No",
"1": "Yes",
}]
templates_text = [
"""sentence1: {"meta":"sentence1"} sentence2: {"meta":"sentence2", "shortenable": True} word: {"meta":"word"} {"mask"}.
"""
]
def load_dataset(self, split):
return datasets.load_dataset('super_glue', 'wic', split=split, script_version="master")
def preprocessor(self, example, add_prefix=True):
src_texts = ["sentence1:", example["sentence1"],
"sentence2:", example["sentence2"],
"word:", example["word"]]
tgt_texts = [str(example["label"])]
return self.seq2seq_format(src_texts, tgt_texts, add_prefix)
if self.data_args.datasets_load_from_disk:
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/super_glue.wic")[split]
else:
return datasets.load_dataset('super_glue', 'wic', split=split, script_version="master")
class SuperGLUEWSCFixed(AbstractTask):
# source: https://github.com/google-research/text-to-text-transfer-transformer/blob/master/t5/data/preprocessors.py
"""Convert WSC examples to text2text format.
WSC includes a sentence along with 2 'spans': the first denoting a noun and
the other a pronoun. The 'label' specifies whether or not the pronoun is
referencing the noun. This preprocessor puts ' * ' around the noun and ' # '
around the pronoun.
For example, a typical example from WSC might look like
{
'text': 'This is a test sentence .',
'span1_text': 'test',
'span1_index': 3,
'span2_text': 'This',
'span2_index': 0,
'label': 0
}
This example would be transformed to
{
'inputs': 'wsc text: # This # is a * test * sentence .',
'targets': 'False'
}
"""
name = "superglue-wsc.fixed"
labels_list = ['0', '1']
split_to_data_split = {"train": "train",
"validation": "validation",
"test": "validation"}
metric = [metrics.accuracy]
metric_names = ["accuracy"]
def load_dataset(self, split):
return datasets.load_dataset('super_glue', 'wsc.fixed', split=split, script_version="master")
# class SuperGLUEWSCFixed(AbstractTask):
# # source: https://github.com/google-research/text-to-text-transfer-transformer/blob/master/t5/data/preprocessors.py
# """Convert WSC examples to text2text format.
# WSC includes a sentence along with 2 'spans': the first denoting a noun and
# the other a pronoun. The 'label' specifies whether or not the pronoun is
# referencing the noun. This preprocessor puts ' * ' around the noun and ' # '
# around the pronoun.
# For example, a typical example from WSC might look like
# {
# 'text': 'This is a test sentence .',
# 'span1_text': 'test',
# 'span1_index': 3,
# 'span2_text': 'This',
# 'span2_index': 0,
# 'label': 0
# }
# This example would be transformed to
# {
# 'inputs': 'wsc text: # This # is a * test * sentence .',
# 'targets': 'False'
# }
# """
# name = "superglue-wsc.fixed"
# labels_list = ['0', '1']
# split_to_data_split = {"train": "train",
# "validation": "validation",
# "test": "validation"}
# metric = [metrics.accuracy]
# metric_names = ["accuracy"]
def _mark_span(self, text, span_str, span_idx, mark):
pattern_tmpl = r'^((?:\S+\s){N})(W)'
pattern = re.sub('N', str(span_idx), pattern_tmpl)
pattern = re.sub('W', span_str, pattern)
return re.sub(pattern, r'\1{0} \2 {0}'.format(mark), text)
# def load_dataset(self, split):
# return datasets.load_dataset('super_glue', 'wsc.fixed', split=split, script_version="master")
def preprocessor(self, example, add_prefix=True):
# converts text as done in T5.
text = example['text']
text = self._mark_span(text, example['span1_text'], example['span1_index'], '*')
# Compensate for 2 added "words" added in previous step.
span2_index = example['span2_index'] + 2 * int(example['span1_index'] < example['span2_index'])
text = self._mark_span(text, example['span2_text'], span2_index, '#')
src_texts = ["text:", text]
tgt_texts = [str(example["label"])]
return self.seq2seq_format(src_texts, tgt_texts, add_prefix)
# def _mark_span(self, text, span_str, span_idx, mark):
# pattern_tmpl = r'^((?:\S+\s){N})(W)'
# pattern = re.sub('N', str(span_idx), pattern_tmpl)
# pattern = re.sub('W', span_str, pattern)
# return re.sub(pattern, r'\1{0} \2 {0}'.format(mark), text)
# def preprocessor(self, example, add_prefix=True):
# # converts text as done in T5.
# text = example['text']
# text = self._mark_span(text, example['span1_text'], example['span1_index'], '*')
# # Compensate for 2 added "words" added in previous step.
# span2_index = example['span2_index'] + 2 * int(example['span1_index'] < example['span2_index'])
# text = self._mark_span(text, example['span2_text'], span2_index, '#')
# src_texts = ["text:", text]
# tgt_texts = [str(example["label"])]
# return self.seq2seq_format(src_texts, tgt_texts, add_prefix)
class SuperGLUERecord(AbstractTask):
@ -526,8 +820,8 @@ class SuperGLUERecord(AbstractTask):
"validation": "validation",
"test": "validation"}
metric = [metrics.squad]
metric_names = ["squad"]
metric_names = ["squad"]
def load_dataset(self, split):
return datasets.load_dataset('super_glue', 'record', split=split, script_version="master")
@ -542,18 +836,18 @@ class SuperGLUERecord(AbstractTask):
passage = re.sub(r'\n@highlight\n', '. ', passage)
inputs = f"record query: {ex['query']} entities: {', '.join(ex['entities'])} passage: {passage}"
if add_prefix:
inputs = self.name + " " + inputs
inputs = self.name + " " + inputs
# duplicates the samples based on number of answers.
num_answers = len(ex["answers"])
num_duplicates = np.maximum(1, num_answers)
new_batch["source"].extend([inputs] * num_duplicates)
new_batch["source"].extend([inputs] * num_duplicates)
new_batch["target"].extend(ex["answers"] if num_answers > 0 else ["<unk>"])
new_batch["task"].extend([self.name] * num_duplicates)
new_batch["extra_fields"].extend([{"answers": ex["answers"]}]*num_duplicates)
new_batch["extra_fields"].extend([{"answers": ex["answers"]}]*num_duplicates)
return new_batch
def map_dataset(self, dataset, add_prefix=True):
return dataset.map(functools.partial(self.preprocessor, add_prefix=add_prefix),
return dataset.map(functools.partial(self.preprocessor, add_prefix=add_prefix),
batched=True, remove_columns=dataset.column_names)
@ -570,21 +864,20 @@ TASK_MAPPING = OrderedDict(
('qqp', QQP),
('stsb', STSB),
('superglue-boolq', SuperGLUEBoolQ),
('superglue-rte', SuperGLUERTE),
('superglue-cb', SuperGLUECB),
('superglue-copa', SuperGLUECOPA),
('superglue-multirc', SuperGLUEMultiRC),
('superglue-wic', SuperGLUEWIC),
('superglue-wsc.fixed', SuperGLUEWSCFixed),
# ('superglue-wsc.fixed', SuperGLUEWSCFixed),
('superglue-record', SuperGLUERecord)
]
)
class AutoTask:
@classmethod
def get(self, task, config, seed=42):
def get(self, task, config, data_args, tokenizer,predict_with_generate, seed=42):
if task in TASK_MAPPING:
return TASK_MAPPING[task](config, seed)
return TASK_MAPPING[task](config, data_args, tokenizer,predict_with_generate, seed)
raise ValueError(
"Unrecognized task {} for AutoTask Model: {}.\n"
"Task name should be one of {}.".format(

View File

@ -45,12 +45,51 @@ def spearman_corrcoef(predictions, targets) -> dict:
spearman_corrcoef = 0
return {"spearmanr": spearman_corrcoef}
def spearman_corrcoef(predictions, targets) -> dict:
"""Computes Spearman correlation coefficient."""
# TODO: we need to do postprocessors in a clean way for each dataset.
from examples_seq2seq.data_processors.postprocessors import string_to_float
targets = [string_to_float(target) for target in targets]
predictions= [string_to_float(prediction) for prediction in predictions]
spearman_corrcoef = 100 * scipy.stats.spearmanr(targets, predictions)[0]
# Note that if all the predictions will be the same, spearman
# correlation is nan, to gaurad against this, we check the output
# and return 0 in this case.
if math.isnan(spearman_corrcoef):
spearman_corrcoef = 0
return {"spearmanr": spearman_corrcoef}
def f1_score_with_invalid(predictions, targets) -> dict:
"""Computes F1 score, with any prediction != 0 or 1 is counted as incorrect.
Args:
targets: list of targets, either 0 or 1
predictions: list of predictions, any integer value
Returns:
F1 score, where any prediction != 0 or 1 is counted as wrong.
"""
def binary_reverse(labels):
return ['0' if label == '1' else '1' for label in labels]
targets, predictions = np.asarray(targets), np.asarray(predictions)
# Get indices of invalid predictions.
invalid_idx_mask = np.logical_and(predictions != '0', predictions != '1')
# For any prediction != 0 or 1, we set the prediction to the opposite of its corresponding target.
predictions[invalid_idx_mask] = binary_reverse(targets[invalid_idx_mask])
targets = targets.astype(np.int32)
predictions = predictions.astype(np.int32)
return {"f1": 100 * sklearn.metrics.f1_score(targets, predictions)}
def transform_for_generation(predictions, targets):
mapping = {k: i for i, k in enumerate(set(targets))}
targets = np.asarray([mapping[k] for k in targets])
predictions = np.asarray([mapping[k] if k in mapping else (t+1)%len(mapping) for t, k in zip(targets, predictions)])
return predictions, targets

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.0002561697332863371,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/10940816",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 32,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.0017750209757755706,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/1107862",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 16,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 8.499916262600587e-05,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/15328099",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 16,
"per_device_train_batch_size": 16,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 32,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.0006091646696452159,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/15991793",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 16,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.020109951371648067,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/19489534",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 16,
"per_device_train_batch_size": 16,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 32,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.005159882530578781,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/2281342",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 0.006869610954981632,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/26349674",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 32,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.0002723799659564822,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/28219263",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.0018605158382269157,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/28244173",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 0.0001248231069039661,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/28313708",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 0.0009490000624893097,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/28844651",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 3.5602209401278214e-05,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/28881946",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 32,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.004220683008677483,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/29695566",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 32,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.004159184883370181,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/304080",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 0.0009353172054773991,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/33594301",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 32,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.0037650265946582574,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/37208828",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 16,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 6.867655291394631e-05,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/38351436",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 16,
"per_device_train_batch_size": 16,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 0.0022951686429675895,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/42338278",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 0.0011474682877585407,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/43419391",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.009965694572181888,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/45030088",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.0020236592832077785,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/50851153",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1 +0,0 @@
{"batch_size": 64, "dataset_config_name": ["en"], "delta_type": "bitfit", "do_eval": true, "do_test": true, "do_train": true, "eval_dataset_config_name": ["en"], "eval_dataset_name": "mrpc", "eval_steps": 200, "evaluation_strategy": "steps", "gradient_accumulation_steps": 1, "greater_is_better": true, "learning_rate": 0.0020236592832077785, "load_best_model_at_end": true, "max_source_length": 128, "max_steps": 5000, "metric_for_best_model": "average_metrics", "model_name_or_path": "t5-base", "output_dir": "outputs_search/bitfit.mrpc.t5-base/50851153", "overwrite_output_dir": true, "per_device_eval_batch_size": 64, "per_device_train_batch_size": 64, "predict_with_generate": true, "push_to_hub": false, "save_steps": 200, "save_strategy": "steps", "save_total_limit": 1, "seed": 100, "split_validation_test": true, "task_name": "mrpc", "test_dataset_config_name": ["en"], "test_dataset_name": "mrpc", "tokenizer_name": "t5-base", "warmup_steps": 0}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.011098597581779427,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/57783553",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 0.0005414844782319124,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/6060488",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 0.016927560240899083,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/61860753",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 1.0141082015912518e-05,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/63232091",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 0.0018137027382556477,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/6329472",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.023938918670661075,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/64753972",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 16,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.08212873599011565,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/65221118",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 16,
"per_device_train_batch_size": 16,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 4.8538530604501934e-05,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/66798551",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 32,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.0056649657801790786,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/67615376",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.03495857107255486,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/6773136",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 0.00039059864620439417,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/68027569",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 0.0002642938525995798,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/68314189",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.037536374095955345,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/71501650",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 32,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.008866400032296955,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/73962149",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.01086484610816823,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/83260414",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 1.2611496517588744e-05,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/83839551",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 32,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.0010110776655071255,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/85624941",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 32,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.0005414844782319124,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/86039549",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.0027955533792956614,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/89676181",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 0.0012573200149141731,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/91446644",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 0.001152480984285531,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/92427532",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 16,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.002464124578330328,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/93923515",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 16,
"per_device_train_batch_size": 16,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 16,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.000127337205276883,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/96799644",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 16,
"per_device_train_batch_size": 16,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 0.017304287780519442,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/97118516",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.057233123182472576,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/97177600",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.041620230849224296,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/97660529",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 0.0005420479832650441,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/98459622",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 32,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.0026938134462562973,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/99566760",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 0.00702408842393251,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/99826259",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mrpc",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 0.00702408842393251,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.mrpc.t5-base/99826259",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "mrpc",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mrpc",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 186 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 34 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 56 KiB

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 1.1032607780913182e-05,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/1123702",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 9.869021064463024e-05,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/12173417",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.000913136097576348,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/14983360",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 1.1605972169428286e-05,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/17148549",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 32,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 2.8707127478048054e-05,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/18069491",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

File diff suppressed because one or more lines are too long

View File

@ -1,42 +0,0 @@
{
"batch_size": 32,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.000194974976225138,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/20719975",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 32,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.03781286205477464,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/26158876",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 32,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.0618810008699179,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/28522034",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 6.262592496186088e-05,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/29099149",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

File diff suppressed because one or more lines are too long

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 4.8538530604501934e-05,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/30778533",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 16,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.0009445961555576889,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/33442523",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 16,
"per_device_train_batch_size": 16,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 16,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.020109951371648067,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/35699804",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 16,
"per_device_train_batch_size": 16,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 6.0263760479697114e-05,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/41924547",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 7.833953000267327e-05,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/45992418",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.0005632466045355159,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/46821674",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.008374542128252581,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/47176009",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 7.170024484707928e-05,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/47432895",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.00043437143218908386,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/47615745",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 0.00010014550655645348,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/49804166",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 16,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 6.867655291394631e-05,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/52735972",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 16,
"per_device_train_batch_size": 16,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 1.1566052650322366e-05,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/56419593",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 32,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 1.760953133010801e-05,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/57722645",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.037536374095955345,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/60994585",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 32,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.00020538230336950936,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/63007050",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.0002561697332863371,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/63359012",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 32,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.02044226389800505,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/63678776",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 1.4137672005343143e-05,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/64148506",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 16,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.08212873599011565,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/68062044",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 16,
"per_device_train_batch_size": 16,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.00012472437804600788,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/69091480",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 0
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.0004055014962899548,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/69407180",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 2,
"greater_is_better": true,
"learning_rate": 0.0005726557355744265,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/71736541",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

View File

@ -1,42 +0,0 @@
{
"batch_size": 64,
"dataset_config_name": [
"en"
],
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps": 1,
"greater_is_better": true,
"learning_rate": 0.00012167338692493834,
"load_best_model_at_end": true,
"max_source_length": 128,
"max_steps": 5000,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "t5-base",
"output_dir": "outputs_search/bitfit.rte.t5-base/71901921",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 64,
"per_device_train_batch_size": 64,
"predict_with_generate": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 100,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "t5-base",
"warmup_steps": 500
}

Some files were not shown because too many files have changed in this diff Show More