Merge branch 'delta_center_dev' into check_pr_33

This commit is contained in:
DingDing 2022-10-10 13:06:38 +08:00 committed by GitHub
commit f6f3b01916
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
111 changed files with 2529 additions and 776 deletions

17
.gitignore vendored
View File

@ -35,4 +35,21 @@ log.txt
**/examples/examples_bmtrain/BMPretrain
**/examples/examples_bmtrain/BigModels/BigModels/results
**/Delta_Memory/
**/output/
**/thunlp/
**/saved_ckpts/
DeltaCenter-Python-Client/
backbone_structure
delta_checkpoints
gitop.sh
load_dataset_and_model.ipynb
load_model.py
scripts
t.py
t.sh
!examples/examples_prompt/configs/*/*.json
!examples/examples_prompt/configs/**
**/delta_checkpoints/
**/outputs/

View File

@ -72,6 +72,11 @@ python setup.py install
python setup.py develop
```
If you encounter network error using setup.py, please firstly install the dependencies via
```shell
pip install -r requirements.txt && python setup.py develop
```
## Must Try
```python

BIN
dist/opendelta-0.2.0-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/opendelta-0.2.0.tar.gz vendored Normal file

Binary file not shown.

BIN
dist/opendelta-0.2.1-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/opendelta-0.2.1.tar.gz vendored Normal file

Binary file not shown.

BIN
dist/opendelta-0.2.2-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/opendelta-0.2.2.tar.gz vendored Normal file

Binary file not shown.

BIN
dist/opendelta-0.2.3-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/opendelta-0.2.3.tar.gz vendored Normal file

Binary file not shown.

BIN
dist/opendelta-0.2.4-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/opendelta-0.2.4.tar.gz vendored Normal file

Binary file not shown.

View File

@ -1,24 +1,59 @@
# !!!!This example collection is still under develop, please wait for some time to use it.
# Examples of using opendelta together with 🤗 transformers.
## install the repo
In this repo, we construct a very general pipeline to train and test a PLM using
🤗 transformers.
The pipeline was constructed together with [openpromptu](https://pypi.org/project/openpromptu/), which is a light and
model-agnostic version of [openprompt](https://github.com/thunlp/OpenPrompt).
## Pool of PLMs
We are going to adapt most of the models in 🤗 transformers
in the repos. The different pipeline, processing, or configurations are specified
in `./backbones/`. You can add your own model in this file to support customized models.
### A example script to run the repo in offline mode
```bash
cd ../
python setup_seq2seq.py develop
conda activate [YOURENV]
PATHBASE=[YOURPATH]
JOBNAME="adapter_t5-base"
DATASET="superglue-cb"
cd $PATHBASE/OpenDelta/examples/examples_prompt/
python configs/gen_t5.py --job $JOBNAME
export TRANSFORMERS_OFFLINE=1
export HF_DATASETS_OFFLINE=1
python src/run.py configs/$JOBNAME/$DATASET.json \
--model_name_or_path [YOURPATH_TO_T5_BASE] \
--tokenizer_name [YOURPATH_TO_T5_BASE] \
--datasets_saved_path [YOURPATH_TO_CB_DATASETS] \
--finetuned_delta_path ${PATHBASE}/delta_checkpoints/ \
--num_train_epochs 20 \
--bottleneck_dim 24 \
--delay_push True
```
This will add `examples_seq2seq` to the environment path of the python lib.
## Generating the json configuration file
## A example of quick testing the repo.
```shell
python configs/gen_$BACKBONETYPE.py --job $YOURJOB
#e.g. python configs/gen_beit.py --job lora_beit-base-patch16-224
```
The available job configuration (e.g., `--job lora_beit-base-patch16-224`) can be seen from the scripts. You can also
create your only configuration.
```bash
conda activate [YOURENV]
PATHBASE=[YOURPATH]
JOBNAME="adapter_t5-base"
DATASET="superglue-cb"
## Run the code
cd $PATHBASE/OpenDelta/examples/examples_prompt/
```
CUDA_VISIBLE_DEVICES=1 python src/run.py configs/lora_beit-base-patch16-224/beans.json
```
export TRANSFORMERS_OFFLINE=1
export HF_DATASETS_OFFLINE=1
export DELTACENTER_OFFLINE=0
python src/test.py configs/$JOBNAME/$DATASET.json \
--model_name_or_path [YOURPATH_TO_T5_BASE] \
--tokenizer_name [YOURPATH_TO_T5_BASE] \
--datasets_saved_path [YOURPATH_TO_CB_DATASETS] \
--finetuned_delta_path thunlp/t5-base_adapter_superglue-cb_20220701171436c80 \
--delta_cache_dir "./delta_checkpoints/" \
--force_download True
```

View File

@ -26,14 +26,14 @@ def preprocess_function(raw_example, **kwargs):
example = InputExample(**raw_example)
try:
example = verbalizer.wrap_one_example(example)
example, other = template.wrap_one_example(example)
input_sentence = tokenizer_wrapper.merge_wrapped_example(example)
model_inputs = tokenizer(input_sentence, max_length=256,
padding="max_length", truncation=True)
except:
from IPython import embed; embed(header="Therer")
example = verbalizer.wrap_one_example(example)
example, other = template.wrap_one_example(example)
input_sentence = tokenizer_wrapper.merge_wrapped_example(example)
model_inputs = tokenizer(input_sentence, max_length=256,
padding="max_length", truncation=True)
with tokenizer.as_target_tokenizer():
label = tokenizer(other['tgt_text']).input_ids
@ -43,7 +43,8 @@ def preprocess_function(raw_example, **kwargs):
def get_backbone(model_args, **kwargs):
config = AutoConfig.from_pretrained(
model_args.config_name if model_args.config_name else model_args.model_name_or_path,
# model_args.config_name if model_args.config_name else model_args.model_name_or_path,
model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,

View File

@ -8,7 +8,6 @@ from transformers import (
AutoFeatureExtractor,
AutoModelForImageClassification,
)
from transformers import ViTFeatureExtractor
from transformers import Trainer as HfTrainer
import torch.nn as nn
@ -26,9 +25,10 @@ def get_prompts(task, tokenizer, data_args, template_id="0", verbalizer_id="0"):
def preprocess_function(raw_example, **kwargs):
# from IPython import embed; embed(header="Therefa")
tokenizer = kwargs['tokenizer']
model_inputs = tokenizer(raw_example['image'], return_tensors='pt')
# print(np.array(raw_example['img']).shape)
model_inputs = tokenizer(np.array(raw_example['image']), return_tensors='pt')
model_inputs['pixel_values'] = model_inputs['pixel_values'].squeeze()
model_inputs['labels'] = raw_example['labels']
model_inputs['labels'] = raw_example['label']
return model_inputs
def compute_metrics(eval_preds, dataset_name, eval_metric):
@ -55,7 +55,7 @@ def mask_token_func(tokenizer, ith_mask=0):
def get_remove_columns(dataset_features):
# dataset_features.pop("label")
print("remove_columns: {}".format(dataset_features))
# print("remove_columns: {}".format(dataset_features))
return dataset_features
class DataCollator(HfDataCollatorMixin):

View File

@ -0,0 +1,169 @@
from openpromptu.data_utils import InputExample
import torch
from transformers.data.data_collator import torch_default_data_collator
from transformers.data.data_collator import DataCollatorMixin as HfDataCollatorMixin
from transformers.data.data_collator import DataCollatorForSeq2Seq as DataCollator
import numpy as np
from transformers import (
AutoConfig,
AutoModelForCausalLM,
AutoTokenizer,
)
from transformers import Seq2SeqTrainer as HfSeq2SeqTrainer
import copy
from torch.nn import CrossEntropyLoss
def preprocess_function(raw_example, **kwargs):
tokenizer = kwargs['tokenizer']
data_args = kwargs['data_args']
template = kwargs['template']
verbalizer = kwargs['verbalizer']
tokenizer_wrapper = kwargs['tokenizer_wrapper']
example = InputExample(**raw_example)
# example = verbalizer.wrap_one_example(example)
example, other = template.wrap_one_example(example)
input_sentence = tokenizer_wrapper.merge_wrapped_example(example)
model_inputs = tokenizer(input_sentence, max_length=data_args.max_source_length,
padding="max_length", truncation=True)
return model_inputs
def compute_metrics(eval_preds, dataset_name, eval_metric):
pass
def mask_token_func(tokenizer, ith_mask=0):
return tokenizer.pad_token
def get_remove_columns(dataset_features):
# dataset_features.remove("label")
return dataset_features
def get_prompts(task, tokenizer, data_args, template_id="0", verbalizer_id="0"):
from openpromptu.prompts import GenerationVerbalizer
from openpromptu.prompts import ManualTemplate
from openpromptu import TokenizerWrapper
template = ManualTemplate(text = task.templates_text[template_id])
verbalizer = GenerationVerbalizer(tokenizer=tokenizer, classes = None, label_words=None)
tokenizer_wrapper = TokenizerWrapper(max_seq_length=data_args.max_source_length, tokenizer=tokenizer, truncate_method="balanced", mask_token_func=mask_token_func)
return template, verbalizer, tokenizer_wrapper
def get_backbone(model_args, **kwargs):
config = AutoConfig.from_pretrained(
model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
)
# config.dropout_rate = 0.0
tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
)
model = AutoModelForCausalLM.from_pretrained(
model_args.model_name_or_path,
from_tf=bool(".ckpt" in model_args.model_name_or_path),
config=config,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
)
return config, tokenizer, model
class Trainer(HfSeq2SeqTrainer):
def __init__(self, verbalizer=None, eval_task=None, **kwargs):
super().__init__(**kwargs)
self.eval_task = eval_task
self.compute_metrics = self._compute_metrics
def compute_loss(self, model, inputs, return_outputs=False):
labels=copy.deepcopy(inputs['input_ids'])
# labels[labels==self.tokenizer.pad_token_id]=-100
outputs = model(**inputs)
logits = outputs.logits
shift_logits = logits[..., :-1, :].contiguous()
shift_labels = labels[..., 1:].contiguous()
loss_fct = CrossEntropyLoss(ignore_index=self.tokenizer.pad_token_id)
loss = loss_fct(shift_logits.view(-1, shift_logits.shape[-1]), shift_labels.long().view(-1))
return (loss, outputs) if return_outputs else loss
def prediction_step(
self,
model, #nn.Module,
inputs, #Dict[str, Union[torch.Tensor, Any]],
prediction_loss_only, #: bool,
ignore_keys, #: Optional[List[str]] = None,
): #-> Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]:
"""
Perform an evaluation step on :obj:`model` using obj:`inputs`.
Subclass and override to inject custom behavior.
Args:
model (:obj:`nn.Module`):
The model to evaluate.
inputs (:obj:`Dict[str, Union[torch.Tensor, Any]]`):
The inputs and targets of the model.
The dictionary will be unpacked before being fed to the model. Most models expect the targets under the
argument :obj:`labels`. Check your model's documentation for all accepted arguments.
prediction_loss_only (:obj:`bool`):
Whether or not to return the loss only.
Return:
Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]: A tuple with the loss, logits and
labels (each being optional).
"""
if not self.args.predict_with_generate or prediction_loss_only:
return super().prediction_step(
model, inputs, prediction_loss_only=prediction_loss_only, ignore_keys=ignore_keys
)
inputs = self._prepare_inputs(inputs)
with torch.no_grad():
labels=copy.deepcopy(inputs['input_ids'])
# labels[labels==self.tokenizer.pad_token_id]=-100
outputs = model(**inputs)
logits = outputs.logits
shift_logits = logits[..., :-1, :].contiguous()
shift_labels = labels[..., 1:].contiguous().long()
loss_fct = CrossEntropyLoss(ignore_index=self.tokenizer.pad_token_id)
loss = loss_fct(shift_logits.view(-1, shift_logits.shape[-1]), shift_labels.view(-1)).detach().cpu()
loss = torch.where(torch.isnan(loss), torch.full_like(loss, 0), loss)
if prediction_loss_only:
return (loss, None, None)
else:
# non pad label
shift_labels = shift_labels.view(-1).detach().cpu()
nonpad_idx = shift_labels!=self.tokenizer.pad_token_id
shift_labels = shift_labels[nonpad_idx]
# the probability at the corresponding position
shift_logits = shift_logits.view(-1, shift_logits.shape[-1])[nonpad_idx].detach().cpu()
target_position = torch.nn.functional.one_hot(shift_labels,shift_logits.shape[-1]).bool().to(shift_labels.device)
shift_logits = shift_logits.softmax(dim=-1)[target_position]
return (loss, shift_logits, shift_labels)
def _compute_metrics(self, eval_preds):
preds, labels = eval_preds
result = {}
for metric in self.eval_task.metric:
result.update(metric(preds, labels,ignore_index=self.tokenizer.pad_token_id))
average_metric = sum(result.values())/len(result)
result.update({"average_metrics":average_metric})
return result

View File

@ -26,14 +26,13 @@ def preprocess_function(raw_example, **kwargs):
example = InputExample(**raw_example)
try:
example = verbalizer.wrap_one_example(example)
example, other = template.wrap_one_example(example)
input_sentence = tokenizer_wrapper.merge_wrapped_example(example)
model_inputs = tokenizer(input_sentence, max_length=data_args.max_source_length,
padding="max_length", truncation=True)
except:
from IPython import embed; embed(header="Therer")
example = verbalizer.wrap_one_example(example)
example, other = template.wrap_one_example(example)
input_sentence = tokenizer_wrapper.merge_wrapped_example(example)
model_inputs = tokenizer(input_sentence, max_length=data_args.max_source_length,
padding="max_length", truncation=True)
with tokenizer.as_target_tokenizer():
label = tokenizer(other['tgt_text']).input_ids
@ -165,7 +164,7 @@ class Trainer(HfSeq2SeqTrainer):
return (loss, generated_tokens, labels)
def _compute_metrics(self, eval_preds):
from IPython import embed; embed(header="In compute metrics")
# from IPython import embed; embed(header="In compute metrics")
preds, labels = eval_preds
decoded_preds = self.tokenizer.batch_decode(preds, skip_special_tokens=True)
decoded_labels = self.tokenizer.batch_decode(labels, skip_special_tokens=True)

View File

@ -0,0 +1,171 @@
from openpromptu.data_utils import InputExample
import torch
from transformers.data.data_collator import torch_default_data_collator
from transformers.data.data_collator import DataCollatorMixin as HfDataCollatorMixin
from transformers.data.data_collator import DataCollatorForSeq2Seq as DataCollator
import numpy as np
from transformers import (
AutoConfig,
AutoModelForCausalLM,
AutoTokenizer,
)
from transformers import Seq2SeqTrainer as HfSeq2SeqTrainer
import copy
from torch.nn import CrossEntropyLoss
def preprocess_function(raw_example, **kwargs):
tokenizer = kwargs['tokenizer']
data_args = kwargs['data_args']
template = kwargs['template']
verbalizer = kwargs['verbalizer']
tokenizer_wrapper = kwargs['tokenizer_wrapper']
example = InputExample(**raw_example)
# example = verbalizer.wrap_one_example(example)
example, other = template.wrap_one_example(example)
input_sentence = tokenizer_wrapper.merge_wrapped_example(example)
model_inputs = tokenizer(input_sentence, max_length=data_args.max_source_length,
padding="max_length", truncation=True)
return model_inputs
def compute_metrics(eval_preds, dataset_name, eval_metric):
pass
def mask_token_func(tokenizer, ith_mask=0):
return tokenizer.pad_token
def get_remove_columns(dataset_features):
# dataset_features.remove("label")
return dataset_features
def get_prompts(task, tokenizer, data_args, template_id="0", verbalizer_id="0"):
from openpromptu.prompts import GenerationVerbalizer
from openpromptu.prompts import ManualTemplate
from openpromptu import TokenizerWrapper
template = ManualTemplate(text = task.templates_text[template_id])
verbalizer = GenerationVerbalizer(tokenizer=tokenizer, classes = None, label_words=None)
tokenizer_wrapper = TokenizerWrapper(max_seq_length=data_args.max_source_length, tokenizer=tokenizer, truncate_method="tail", mask_token_func=mask_token_func)
return template, verbalizer, tokenizer_wrapper
def get_backbone(model_args, **kwargs):
config = AutoConfig.from_pretrained(
model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
)
# config.dropout_rate = 0.0
tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
)
if not hasattr(tokenizer,"pad_token") or (hasattr(tokenizer,"pad_token") and tokenizer.pad_token==None):
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
model_args.model_name_or_path,
from_tf=bool(".ckpt" in model_args.model_name_or_path),
config=config,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
)
return config, tokenizer, model
class Trainer(HfSeq2SeqTrainer):
def __init__(self, verbalizer=None, eval_task=None, **kwargs):
super().__init__(**kwargs)
self.eval_task = eval_task
self.compute_metrics = self._compute_metrics
def compute_loss(self, model, inputs, return_outputs=False):
labels=copy.deepcopy(inputs['input_ids'])
# labels[labels==self.tokenizer.pad_token_id]=-100
outputs = model(**inputs)
logits = outputs.logits
shift_logits = logits[..., :-1, :].contiguous()
shift_labels = labels[..., 1:].contiguous()
loss_fct = CrossEntropyLoss(ignore_index=self.tokenizer.pad_token_id)
loss = loss_fct(shift_logits.view(-1, shift_logits.shape[-1]), shift_labels.long().view(-1))
return (loss, outputs) if return_outputs else loss
def prediction_step(
self,
model, #nn.Module,
inputs, #Dict[str, Union[torch.Tensor, Any]],
prediction_loss_only, #: bool,
ignore_keys, #: Optional[List[str]] = None,
): #-> Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]:
"""
Perform an evaluation step on :obj:`model` using obj:`inputs`.
Subclass and override to inject custom behavior.
Args:
model (:obj:`nn.Module`):
The model to evaluate.
inputs (:obj:`Dict[str, Union[torch.Tensor, Any]]`):
The inputs and targets of the model.
The dictionary will be unpacked before being fed to the model. Most models expect the targets under the
argument :obj:`labels`. Check your model's documentation for all accepted arguments.
prediction_loss_only (:obj:`bool`):
Whether or not to return the loss only.
Return:
Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]: A tuple with the loss, logits and
labels (each being optional).
"""
if not self.args.predict_with_generate or prediction_loss_only:
return super().prediction_step(
model, inputs, prediction_loss_only=prediction_loss_only, ignore_keys=ignore_keys
)
inputs = self._prepare_inputs(inputs)
with torch.no_grad():
labels=copy.deepcopy(inputs['input_ids'])
# labels[labels==self.tokenizer.pad_token_id]=-100
outputs = model(**inputs)
logits = outputs.logits
shift_logits = logits[..., :-1, :].contiguous()
shift_labels = labels[..., 1:].contiguous().long()
loss_fct = CrossEntropyLoss(ignore_index=self.tokenizer.pad_token_id)
loss = loss_fct(shift_logits.view(-1, shift_logits.shape[-1]), shift_labels.view(-1)).detach().cpu()
loss = torch.where(torch.isnan(loss), torch.full_like(loss, 0), loss)
if prediction_loss_only:
return (loss, None, None)
else:
# non pad label
shift_labels = shift_labels.view(-1).detach().cpu()
nonpad_idx = shift_labels!=self.tokenizer.pad_token_id
shift_labels = shift_labels[nonpad_idx]
# the probability at the corresponding position
shift_logits = shift_logits.view(-1, shift_logits.shape[-1])[nonpad_idx].detach().cpu()
target_position = torch.nn.functional.one_hot(shift_labels,shift_logits.shape[-1]).bool().to(shift_labels.device)
shift_logits = shift_logits.softmax(dim=-1)[target_position]
return (loss, shift_logits, shift_labels)
def _compute_metrics(self, eval_preds):
preds, labels = eval_preds
result = {}
for metric in self.eval_task.metric:
result.update(metric(preds, labels,ignore_index=self.tokenizer.pad_token_id))
average_metric = sum(result.values())/len(result)
result.update({"average_metrics":average_metric})
return result

View File

@ -26,14 +26,13 @@ def preprocess_function(raw_example, **kwargs):
example = InputExample(**raw_example)
try:
example = verbalizer.wrap_one_example(example)
example, other = template.wrap_one_example(example)
input_sentence = tokenizer_wrapper.merge_wrapped_example(example)
model_inputs = tokenizer(input_sentence, max_length=256,
padding="max_length", truncation=True)
except:
from IPython import embed; embed(header="Therer")
example = verbalizer.wrap_one_example(example)
example, other = template.wrap_one_example(example)
input_sentence = tokenizer_wrapper.merge_wrapped_example(example)
model_inputs = tokenizer(input_sentence, max_length=256,
padding="max_length", truncation=True)
with tokenizer.as_target_tokenizer():
label = tokenizer(other['tgt_text']).input_ids

View File

@ -1,59 +0,0 @@
# the final results will be populated here.{
"evaluate": {
"epoch": 20.0,
"eval_accuracy": 89.2156862745098,
"eval_average_metrics": 90.76168929110105,
"eval_f1": 92.3076923076923,
"eval_loss": 0.16493959724903107,
"eval_runtime": 1.6391,
"eval_samples_per_second": 124.455
},
"repo_name": "DeltaHub/bitfit_t5-base_mrpc",
"test": {
"epoch": 20.0,
"test_accuracy": 88.23529411764706,
"test_average_metrics": 89.97971602434077,
"test_f1": 91.72413793103448,
"test_loss": 0.14968213438987732,
"test_runtime": 1.6344,
"test_samples_per_second": 124.82
}
}
{
"evaluate": {
"epoch": 20.0,
"eval_average_metrics": 52.10265668831534,
"eval_loss": 0.3603779077529907,
"eval_matthews_correlation": 52.10265668831534,
"eval_runtime": 1.0808,
"eval_samples_per_second": 482.046
},
"repo_name": "DeltaHub/bitfit_t5-base_cola",
"test": {
"epoch": 20.0,
"test_average_metrics": 54.209563471221934,
"test_loss": 0.2853100299835205,
"test_matthews_correlation": 54.209563471221934,
"test_runtime": 1.056,
"test_samples_per_second": 494.304
}
}
{
"evaluate": {
"epoch": 20.0,
"eval_average_metrics": 53.80613287067274,
"eval_loss": 0.25723716616630554,
"eval_matthews_correlation": 53.80613287067274,
"eval_runtime": 1.0583,
"eval_samples_per_second": 492.299
},
"repo_name": "DeltaHub/bitfit_t5-base_cola",
"test": {
"epoch": 20.0,
"test_average_metrics": 54.32497579543861,
"test_loss": 0.22327613830566406,
"test_matthews_correlation": 54.32497579543861,
"test_runtime": 1.0556,
"test_samples_per_second": 494.507
}
}

View File

@ -0,0 +1,48 @@
{
"bottleneck_dim": 24,
"dataset_config_name": [
"en"
],
"delta_type": "adapter",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "beans",
"eval_steps": 200,
"evaluation_strategy": "steps",
"greater_is_better": true,
"learning_rate": 0.0003,
"load_best_model_at_end": true,
"max_source_length": 256,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/clip-vit-base-patch32",
"num_classes": 3,
"num_train_epochs": 20,
"output_dir": "outputs/adapter/clip-vit-base-patch32/beans",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"predict_with_generate": true,
"push_to_delta_center": true,
"push_to_hub": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 42,
"split_validation_test": true,
"task_name": "beans",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "beans",
"tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/clip-vit-base-patch32",
"unfrozen_modules": [
"deltas",
"layer_norm",
"final_layer_norm"
],
"warmup_steps": 0
}

View File

@ -0,0 +1,53 @@
{
"backbone_model": "opt",
"bottleneck_dim": 24,
"dataset_config_name": [
"en"
],
"datasets_load_from_disk": true,
"datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/",
"delta_type": "adapter",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "wikitext",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps":2,
"greater_is_better": false,
"learning_rate": 0.0003,
"load_best_model_at_end": true,
"max_source_length": 900,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/opt-350m",
"model_path_public": "opt-350m",
"num_train_epochs": 3,
"output_dir": "outputs/adapter/opt-350m/wikitext",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 6,
"per_device_train_batch_size": 6,
"predict_with_generate": true,
"push_to_dc": true,
"push_to_hf": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 42,
"split_validation_test": true,
"task_name": "wikitext",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "wikitext",
"tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/opt-350m",
"unfrozen_modules": [
"deltas",
"layer_norm",
"final_layer_norm"
],
"warmup_steps": 0,
"modified_modules":["self_attn"]
}

View File

@ -0,0 +1,53 @@
{
"backbone_model": "vit",
"bottleneck_dim": 24,
"dataset_config_name": [
"en"
],
"datasets_load_from_disk": false,
"datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/",
"delta_type": "adapter",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "beans",
"eval_steps": 200,
"evaluation_strategy": "steps",
"greater_is_better": true,
"learning_rate": 0.0003,
"load_best_model_at_end": true,
"max_source_length": 256,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/vit-large-patch16-224-in21k",
"model_path_public": "vit-large-patch16-224-in21k",
"num_classes": 3,
"num_train_epochs": 20,
"output_dir": "outputs/adapter/vit-large-patch16-224-in21k/beans",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"predict_with_generate": false,
"push_to_dc": true,
"push_to_hf": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 42,
"split_validation_test": true,
"task_name": "beans",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "beans",
"tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/vit-large-patch16-224-in21k",
"unfrozen_modules": [
"deltas",
"layer_norm",
"final_layer_norm"
],
"warmup_steps": 0,
"modified_modules":["output"]
}

View File

@ -0,0 +1,51 @@
{
"backbone_model": "t5-large",
"dataset_config_name": [
"en"
],
"datasets_load_from_disk": true,
"datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/",
"delta_type": "bitfit",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 100,
"evaluation_strategy": "steps",
"greater_is_better": true,
"learning_rate": 0.0003,
"load_best_model_at_end": true,
"max_source_length": 256,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/t5-large",
"model_path_public": "t5-large",
"num_train_epochs": 20,
"output_dir": "outputs/bitfit/t5-large/rte",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 16,
"per_device_train_batch_size": 16,
"predict_with_generate": true,
"push_to_dc": true,
"push_to_hf": false,
"save_steps": 100,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 42,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/t5-large",
"unfrozen_modules": [
"deltas",
"layer_norm",
"final_layer_norm"
],
"warmup_steps": 0,
"modified_modules":["attn", "ff", "layer_norm"]
}

View File

@ -0,0 +1,66 @@
{
"backbone_model": "blenderbot",
"dataset_config_name": [
"en"
],
"datasets_load_from_disk": true,
"datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/",
"delta_type": "compacter",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "sst2",
"eval_steps": 200,
"evaluation_strategy": "steps",
"factorized_phm": true,
"factorized_phm_rule": false,
"gradient_clip": false,
"greater_is_better": true,
"hypercomplex_adapters": true,
"hypercomplex_division": 4,
"hypercomplex_nonlinearity": "glorot-uniform",
"learn_phm": true,
"learning_rate": 0.003,
"load_best_model_at_end": true,
"max_source_length": 128,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/blenderbot-3b",
"model_path_public": "blenderbot-3b",
"non_linearity": "gelu_new",
"normalize_phm_weight": false,
"num_train_epochs": 3,
"output_dir": "outputs/compacter/blenderbot-3b/sst2",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"phm_c_init": "normal",
"phm_clamp": false,
"phm_init_range": 0.0001,
"predict_with_generate": true,
"push_to_dc": true,
"push_to_hf": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 42,
"shared_phm_rule": false,
"split_validation_test": true,
"task_name": "sst2",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "sst2",
"tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/blenderbot-3b",
"unfrozen_modules": [
"deltas",
"layer_norm",
"final_layer_norm"
],
"use_bias_down_sampler": true,
"use_bias_up_sampler": true,
"warmup_steps": 0,
"modified_modules":["fc2"]
}

View File

@ -0,0 +1,51 @@
{
"backbone_model": "deberta-v2-xlarge",
"dataset_config_name": [
"en"
],
"datasets_load_from_disk": true,
"datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/",
"delta_type": "compacter",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "mnli",
"eval_steps": 500,
"evaluation_strategy": "steps",
"greater_is_better": true,
"is_seq2seq": false,
"learning_rate": 0.0003,
"load_best_model_at_end": true,
"max_source_length": 256,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/deberta-v2-xlarge",
"num_train_epochs": 3,
"output_dir": "outputs/compacter/deberta-v2-xlarge/mnli",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"predict_with_generate": false,
"push_to_dc": true,
"push_to_hub": false,
"save_steps": 500,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 42,
"split_validation_test": true,
"task_name": "mnli",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "mnli",
"tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/deberta-v2-xlarge",
"unfrozen_modules": [
"deltas",
"layer_norm",
"final_layer_norm"
],
"warmup_steps": 0,
"modified_modules":["attention"]
}

View File

@ -0,0 +1,51 @@
{
"backbone_model": "long-t5",
"dataset_config_name": [
"en"
],
"datasets_load_from_disk": true,
"datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/",
"delta_type": "compacter",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 100,
"evaluation_strategy": "steps",
"greater_is_better": true,
"learning_rate": 0.0003,
"load_best_model_at_end": true,
"max_source_length": 256,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/long-t5-tglobal-large",
"model_path_public": "long-t5-tglobal-large",
"num_train_epochs": 20,
"output_dir": "outputs/compacter/long-t5-tglobal-large/rte",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 16,
"per_device_train_batch_size": 16,
"predict_with_generate": true,
"push_to_dc": true,
"push_to_hf": false,
"save_steps": 100,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 42,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/long-t5-tglobal-large",
"unfrozen_modules": [
"deltas",
"layer_norm",
"final_layer_norm"
],
"warmup_steps": 0,
"modified_modules":["attn", "ff", "layer_norm"]
}

View File

@ -2,7 +2,7 @@ import collections
import copy
PATHBASE="/mnt/sfs_turbo/hsd/plm_cache/"
PATHBASE="/home/hushengding/plm_cache/"
# PATHBASE="/home/hushengding/plm_cache/"
AllConfigs = {}

View File

@ -45,11 +45,14 @@ BaseConfigs['t5-base'] = {
"greater_is_better": True,
"evaluation_strategy": "steps",
"overwrite_output_dir": True,
"push_to_hub": False,
"push_to_delta_center": True,
"push_to_hf": False,
"push_to_dc": True,
"save_strategy": "steps",
"datasets_load_from_disk": True,
"datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/"
"datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/",
"backbone_model": "t5", # use in delta center,
"model_path_public": "t5-base", # use in delta center,
}
AllConfigs['bitfit_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])

View File

@ -0,0 +1,52 @@
{
"backbone_model": "beit",
"dataset_config_name": [
"en"
],
"datasets_load_from_disk": true,
"datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk",
"delta_type": "lora",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "cifar10",
"eval_steps": 200,
"evaluation_strategy": "steps",
"greater_is_better": true,
"learning_rate": 0.0003,
"load_best_model_at_end": true,
"max_source_length": 256,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/beit-large-patch16-224",
"model_path_public": "beit-large-patch16-224",
"num_classes": 10,
"num_train_epochs": 20,
"output_dir": "outputs/lora/beit-large-patch16-224/cifar10",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"predict_with_generate": false,
"push_to_dc": true,
"push_to_hf": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 42,
"split_validation_test": true,
"task_name": "cifar10",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "cifar10",
"tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/beit-large-patch16-224",
"unfrozen_modules": [
"deltas",
"layer_norm",
"final_layer_norm"
],
"warmup_steps": 0,
"modified_modules":["query","value"]
}

View File

@ -0,0 +1,52 @@
{
"backbone_model": "gpt-j",
"dataset_config_name": [
"en"
],
"datasets_load_from_disk": true,
"datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/",
"delta_type": "lora",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "wikitext",
"eval_steps": 500,
"evaluation_strategy": "steps",
"gradient_accumulation_steps":4,
"greater_is_better": false,
"learning_rate": 0.00003,
"load_best_model_at_end": true,
"max_source_length": 512,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/gpt-j-6B",
"model_path_public": "gpt-j-6B",
"num_train_epochs": 2,
"output_dir": "outputs/lora/gpt-j-6B/wikitext",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 2,
"per_device_train_batch_size": 2,
"predict_with_generate": true,
"push_to_dc": true,
"push_to_hf": false,
"save_steps": 500,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 42,
"split_validation_test": true,
"task_name": "wikitext",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "wikitext",
"tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/gpt-j-6B",
"unfrozen_modules": [
"deltas",
"layer_norm",
"final_layer_norm"
],
"warmup_steps": 0,
"modified_modules":["20.attn.q_proj","21.attn.q_proj","22.attn.q_proj","23.attn.q_proj","24.attn.q_proj","25.attn.q_proj","26.attn.q_proj","27.attn.q_proj"]
}

View File

@ -0,0 +1,52 @@
{
"backbone_model": "roberta-large",
"dataset_config_name": [
"en"
],
"datasets_load_from_disk": true,
"datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/",
"delta_type": "lora",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "superglue-boolq",
"eval_steps": 200,
"evaluation_strategy": "steps",
"greater_is_better": true,
"is_seq2seq": false,
"learning_rate": 0.0001,
"load_best_model_at_end": true,
"max_source_length": 256,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/roberta-large",
"model_path_public": "roberta-large",
"num_train_epochs": 20,
"output_dir": "outputs/lora/roberta-large/superglue-boolq",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"predict_with_generate": false,
"push_to_hub": false,
"push_to_dc": true,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 42,
"split_validation_test": true,
"task_name": "superglue-boolq",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "superglue-boolq",
"tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/roberta-large",
"unfrozen_modules": [
"deltas",
"layer_norm",
"final_layer_norm"
],
"warmup_steps": 0,
"modified_modules":["query","value"]
}

View File

@ -0,0 +1,52 @@
{
"backbone_model": "xlm-roberta-large",
"dataset_config_name": [
"en"
],
"datasets_load_from_disk": true,
"datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/",
"delta_type": "lora",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "superglue-wic",
"eval_steps": 100,
"evaluation_strategy": "steps",
"greater_is_better": true,
"is_seq2seq": false,
"learning_rate": 0.0003,
"load_best_model_at_end": true,
"max_source_length": 256,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/xlm-roberta-large",
"model_path_public": "xlm-roberta-large",
"num_train_epochs": 20,
"output_dir": "outputs/lora/xlm-roberta-large/superglue-wic",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 16,
"per_device_train_batch_size": 16,
"predict_with_generate": false,
"push_to_dc": true,
"push_to_hub": false,
"save_steps": 100,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 42,
"split_validation_test": true,
"task_name": "superglue-wic",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "superglue-wic",
"tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/xlm-roberta-large",
"unfrozen_modules": [
"deltas",
"layer_norm",
"final_layer_norm"
],
"warmup_steps": 0,
"modified_modules":["query","value"]
}

View File

@ -0,0 +1,52 @@
{
"backbone_model": "gpt2",
"dataset_config_name": [
"en"
],
"datasets_load_from_disk": true,
"datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/",
"delta_type": "low_rank_adapter",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "wikitext",
"eval_steps": 200,
"evaluation_strategy": "steps",
"gradient_accumulation_steps":1,
"greater_is_better": false,
"learning_rate": 0.0003,
"load_best_model_at_end": true,
"max_source_length": 768,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/gpt2",
"model_path_public": "gpt2",
"num_train_epochs": 2,
"output_dir": "outputs/low_rank_adapter/gpt2/wikitext",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 16,
"per_device_train_batch_size": 16,
"predict_with_generate": true,
"push_to_dc": true,
"push_to_hf": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 42,
"split_validation_test": true,
"task_name": "wikitext",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "wikitext",
"tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/gpt2",
"unfrozen_modules": [
"deltas",
"layer_norm",
"final_layer_norm"
],
"warmup_steps": 0,
"modified_modules":["attn","mlp"]
}

View File

@ -0,0 +1,51 @@
{
"backbone_model": "bert-large-cased",
"dataset_config_name": [
"en"
],
"datasets_load_from_disk": true,
"datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/",
"delta_type": "prefix",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "rte",
"eval_steps": 100,
"evaluation_strategy": "steps",
"greater_is_better": true,
"is_seq2seq": false,
"learning_rate": 0.0003,
"load_best_model_at_end": true,
"max_source_length": 256,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/bert-large-cased",
"num_train_epochs": 20,
"output_dir": "outputs/prefix/bert-large-cased/rte",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 16,
"per_device_train_batch_size": 16,
"predict_with_generate": false,
"push_to_dc": true,
"push_to_hub": false,
"save_steps": 100,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 42,
"split_validation_test": true,
"task_name": "rte",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "rte",
"tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/bert-large-cased",
"unfrozen_modules": [
"deltas",
"layer_norm",
"final_layer_norm"
],
"warmup_steps": 0,
"modified_modules":["attention"]
}

View File

@ -0,0 +1,51 @@
{
"backbone_model": "bart",
"dataset_config_name": [
"en"
],
"datasets_load_from_disk": true,
"datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/",
"delta_type": "soft_prompt",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "superglue-boolq",
"eval_steps": 500,
"evaluation_strategy": "steps",
"gradient_accumulation_steps":1,
"greater_is_better": true,
"learning_rate": 0.1,
"load_best_model_at_end": true,
"max_source_length": 256,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/bart-large",
"model_path_public": "bart-large",
"num_train_epochs": 50,
"output_dir": "outputs/soft_prompt/bart-large/superglue-boolq",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"predict_with_generate": true,
"push_to_dc": true,
"push_to_hf": false,
"save_steps": 500,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 42,
"soft_token_num":100,
"split_validation_test": true,
"task_name": "superglue-boolq",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "superglue-boolq",
"tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/bart-large",
"token_init": true,
"unfrozen_modules": [
"deltas"
],
"warmup_steps": 0
}

View File

@ -93,4 +93,10 @@ class AbstractTask(abc.ABC):
# shuffles the data and samples it.
if n_obs is not None:
dataset = self.subsample(dataset, n_obs)
return dataset.map(self.preprocessor)
this_method = getattr(self.__class__, 'preprocessor')
base_method = getattr(AbstractTask, 'preprocessor')
if this_method is not base_method:
return dataset.map(self.preprocessor)
else:
return dataset

View File

@ -12,22 +12,16 @@ import logging
import numpy as np
import torch
import re
from openprompt.prompts import ManualTemplate, ManualVerbalizer
from openprompt.plms.utils import TokenizerWrapper
from openprompt.data_utils import InputExample
from openprompt.prompts import GenerationVerbalizer
import itertools
import os
logger = logging.getLogger(__name__)
from transformers.models.auto.tokenization_auto import tokenizer_class_from_name
from typing import List, Dict
from collections import defaultdict
from openprompt.utils import round_list
import warnings
@ -68,7 +62,8 @@ class COLA(AbstractTask):
}
def load_dataset(self, split):
if self.data_args.datasets_load_from_disk:
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
if offline == '1':
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/glue.cola")[split]
else:
return datasets.load_dataset('glue', 'cola',
@ -96,7 +91,8 @@ class SST2(AbstractTask):
}
def load_dataset(self, split):
if self.data_args.datasets_load_from_disk:
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
if offline == '1':
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/glue.sst2")[split]
else:
return datasets.load_dataset('glue', 'sst2',
@ -123,10 +119,9 @@ class MRPC(AbstractTask):
}
def load_dataset(self, split):
if self.data_args.datasets_load_from_disk:
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
if offline == '1':
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/glue.mrpc")[split]
else:
return datasets.load_dataset('glue', 'mrpc', split=split, script_version="master")
@ -152,7 +147,8 @@ class QQP(AbstractTask):
def load_dataset(self, split):
if self.data_args.datasets_load_from_disk:
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
if offline == '1':
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/glue.qqp")[split]
else:
return datasets.load_dataset('glue', 'qqp',
@ -208,7 +204,8 @@ class MNLI(AbstractTask):
}
def load_dataset(self, split):
if self.data_args.datasets_load_from_disk:
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
if offline == '1':
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/glue.mnli")[split]
else:
return datasets.load_dataset('glue', 'mnli', split=split, script_version="master")
@ -243,7 +240,8 @@ class QNLI(AbstractTask):
def load_dataset(self, split):
if self.data_args.datasets_load_from_disk:
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
if offline == '1':
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/glue.qnli")[split]
else:
return datasets.load_dataset('glue', 'qnli', split=split, script_version="master")
@ -279,7 +277,8 @@ class RTE(AbstractTask):
}
def load_dataset(self, split):
if self.data_args.datasets_load_from_disk:
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
if offline == '1':
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/glue.rte")[split]
else:
return datasets.load_dataset('glue', 'rte',
@ -306,7 +305,8 @@ class WNLI(AbstractTask):
def load_dataset(self, split):
if self.data_args.datasets_load_from_disk:
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
if offline == '1':
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/glue.wnli")[split]
else:
return datasets.load_dataset('glue', 'wnli', split=split, script_version="master")
@ -334,7 +334,8 @@ class SuperGLUEBoolQ(AbstractTask):
}
def load_dataset(self, split):
if self.data_args.datasets_load_from_disk:
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
if offline == '1':
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/super_glue.boolq")[split]
else:
return datasets.load_dataset('super_glue', 'boolq', split=split, script_version="master")
@ -347,8 +348,8 @@ class SuperGLUECB(AbstractTask):
split_to_data_split = {"train": "train",
"validation": "validation",
"test": "validation"}
metric = [metrics.mean_multiclass_f1(num_classes=3), metrics.accuracy]
metric_names = ["f1_multiclass", "accuracy"]
metric = [metrics.accuracy]
metric_names = ["accuracy"]
verbalizers = {
"0":{"0": "yes",
@ -361,7 +362,8 @@ class SuperGLUECB(AbstractTask):
}
def load_dataset(self, split):
if self.data_args.datasets_load_from_disk:
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
if offline == '1':
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/super_glue.cb")[split]
else:
return datasets.load_dataset('super_glue', 'cb', split=split, script_version="master")
@ -387,7 +389,8 @@ class SuperGLUECOPA(AbstractTask):
}
def load_dataset(self, split):
if self.data_args.datasets_load_from_disk:
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
if offline == '1':
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/super_glue.copa")[split]
else:
return datasets.load_dataset('super_glue', 'copa', split=split, script_version="master")
@ -416,7 +419,8 @@ class SuperGLUEMultiRC(AbstractTask):
def load_dataset(self, split):
if self.data_args.datasets_load_from_disk:
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
if offline == '1':
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/super_glue.multirc")[split]
else:
return datasets.load_dataset('super_glue', 'multirc', split=split, script_version="master")
@ -459,7 +463,8 @@ class SuperGLUEWIC(AbstractTask):
}
def load_dataset(self, split):
if self.data_args.datasets_load_from_disk:
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
if offline == '1':
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/super_glue.wic")[split]
else:
return datasets.load_dataset('super_glue', 'wic', split=split, script_version="master")
@ -549,13 +554,76 @@ class Beans(AbstractTask):
def load_dataset(self, split):
# from IPython import embed; embed(header="beans")
if self.data_args.datasets_load_from_disk:
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/super_glue.wic")[split]
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
if offline == '1':
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/beans")[split]
else:
return datasets.load_dataset('beans', split=split, script_version="master")
class Wikitext(AbstractTask):
#wikitext-2-v1
name = "wikitext"
# labels_list = ['angular_leaf_spot', 'bean_rust', "healthy"]
split_to_data_split = {"train": "train",
"validation": "validation",
"test": "validation"}
metric = [metrics.perplexity]
metric_names = ["perplexity"]
verbalizers = {
"0": {
}
}
templates_text = {
"0": """{"meta":"text"}"""
}
split_valid_to_make_test = True
def load_dataset(self, split):
# from IPython import embed; embed(header="beans")
if self.data_args.datasets_load_from_disk:
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/wikitext")[split]
else:
return datasets.load_dataset('wikitext','wikitext-2-v1', split=split, script_version="master")
class Cifar10(AbstractTask):
name = "cifar10"
split_to_data_split = {"train": "train",
"validation": "test",
"test": "test"}
metric = [metrics.accuracy]
metric_names = ["accuracy"]
def load_dataset(self, split):
if self.data_args.datasets_load_from_disk:
d = datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/cifar10")[split].select(range(100))
print(d)
return d
else:
return datasets.load_dataset('cifar10', split=split, script_version="master")
# def preprocessor(self, example):
# example_ = {}
# example_["image"] = example["image"]
# example_["labels"] = example["label"]
# return example_
class Fashion_MNIST(AbstractTask):
name = "Fashion-MNIST"
split_to_data_split = {"train": "train",
"validation": "test",
"test": "test"}
metric = [metrics.accuracy]
metric_names = ["accuracy"]
def load_dataset(self, split):
if self.data_args.datasets_load_from_disk:
d = datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/fashion_mnist")[split]
print(d)
return d
else:
return datasets.load_dataset('fashion_mnist', split=split, script_version="master")
TASK_MAPPING = OrderedDict(
[
@ -575,7 +643,10 @@ TASK_MAPPING = OrderedDict(
('superglue-multirc', SuperGLUEMultiRC),
('superglue-wic', SuperGLUEWIC),
# ('superglue-record', SuperGLUERecord)
('beans', Beans)
('beans', Beans),
('wikitext',Wikitext),
('cifar10',Cifar10),
('fashion_mnist',Fashion_MNIST)
]
)

View File

@ -11,6 +11,14 @@ import sklearn.metrics
logger = getLogger(__name__)
def perplexity(outputs, targets,ignore_index=-100):
"""Computes the perplexity accuracy."""
ce = -np.log(outputs).mean()
# ce = F.cross_entropy(torch.Tensor(outputs).view(-1, outputs.shape[-1]), torch.Tensor(targets).view(-1).long(),ignore_index=ignore_index)
return {"perplexity":float(np.exp(ce))}
def accuracy(predictions, targets) -> dict:
"""Computes the average accuracy."""
return {"accuracy": 100 * ((np.array(predictions) == np.array(targets)).mean())}
@ -47,20 +55,20 @@ def spearman_corrcoef(predictions, targets) -> dict:
def spearman_corrcoef(predictions, targets) -> dict:
"""Computes Spearman correlation coefficient."""
# TODO: we need to do postprocessors in a clean way for each dataset.
from examples_seq2seq.data_processors.postprocessors import string_to_float
targets = [string_to_float(target) for target in targets]
predictions= [string_to_float(prediction) for prediction in predictions]
spearman_corrcoef = 100 * scipy.stats.spearmanr(targets, predictions)[0]
# def spearman_corrcoef(predictions, targets) -> dict:
# """Computes Spearman correlation coefficient."""
# # TODO: we need to do postprocessors in a clean way for each dataset.
# from examples_seq2seq.data_processors.postprocessors import string_to_float
# targets = [string_to_float(target) for target in targets]
# predictions= [string_to_float(prediction) for prediction in predictions]
# spearman_corrcoef = 100 * scipy.stats.spearmanr(targets, predictions)[0]
# Note that if all the predictions will be the same, spearman
# correlation is nan, to gaurad against this, we check the output
# and return 0 in this case.
if math.isnan(spearman_corrcoef):
spearman_corrcoef = 0
return {"spearmanr": spearman_corrcoef}
# # Note that if all the predictions will be the same, spearman
# # correlation is nan, to gaurad against this, we check the output
# # and return 0 in this case.
# if math.isnan(spearman_corrcoef):
# spearman_corrcoef = 0
# return {"spearmanr": spearman_corrcoef}
def f1_score_with_invalid(predictions, targets) -> dict:
@ -102,8 +110,8 @@ def f1_score(predictions, targets) -> dict:
Returns:
F1 score, where any prediction != 0 or 1 is counted as wrong.
"""
targets = targets.astype(np.int32)
predictions = predictions.astype(np.int32)
targets = np.array(targets).astype(np.int32)
predictions = np.array(predictions).astype(np.int32)
return {"f1": 100 * sklearn.metrics.f1_score(targets, predictions)}
# TODO: maybe gaurd against invalid values https://stackoverflow.com/questions/56865344/how-do-i-calculate-the-matthews-correlation-coefficient-in-tensorflow

View File

@ -26,10 +26,12 @@ You can also adapt this script on your own tasks.
import os
import sys
os.environ['MKL_THREADING_LAYER'] = 'GNU'
os.environ['MKL_SERVICE_FORCE_INTEL'] = '1'
os.environ["TOKENIZERS_PARALLELISM"] = "false"
sys.path.append(os.path.join(os.getcwd(), "../"))
# sys.path.append(os.path.join(os.getcwd(), "/mnt/sfs_turbo/zhangzhen/OpenDelta"))
sys.path.append(os.path.join(os.getcwd()))
import functools
@ -56,7 +58,7 @@ from transformers.trainer_utils import is_main_process, get_last_checkpoint
from data_processors import AutoTask #, #TaskDataCollatorForSeq2Seq, AutoPostProcessor, data_collator
from utils import read_json, save_json
from utils.args import ModelArguments, TrainingArguments, DataTrainingArguments, RemainArgHfArgumentParser
from utils.args import ModelArguments, TrainingArguments, DataTrainingArguments, DeltaArguments, RemainArgHfArgumentParser
logger = logging.getLogger(__name__)
@ -66,16 +68,14 @@ def main():
# See all possible arguments in src/transformers/training_args.py
# or by passing the --help flag to this script.
# We now keep distinct sets of args, for a cleaner separation of concerns.
parser = RemainArgHfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
# If we pass only one argument to the script and it's the path to a json file,
# let's parse it to get our arguments.
model_args, data_args, training_args, delta_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
else:
model_args, data_args, training_args, delta_args = parser.parse_args_into_dataclasses(return_remaining_strings=True)
parser = RemainArgHfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments, DeltaArguments))
# You can provide a json file with contains the arguments and use the --argument some_arg to override or append to the json file.
json_file, cmd_args = (os.path.abspath(sys.argv[1]), sys.argv[2:]) if sys.argv[1].endswith(".json") else (None, sys.argv[1:])
model_args, data_args, training_args, delta_args, remain_args = parser.parse_json_file_with_cmd_args(json_file=json_file, command_line_args=cmd_args)
logger.warning("The following arguments not used! {}".format(remain_args))
print(f"{training_args.output_dir}/results.json")
logger.info(f"The results will be used in {training_args.output_dir}/results.json")
# exit()
# Detecting last checkpoint.
last_checkpoint = None
@ -121,7 +121,8 @@ def main():
if os.path.basename(model_args.model_name_or_path).startswith("t5"):
if os.path.basename(model_args.model_name_or_path).startswith("t5") \
or os.path.basename(model_args.model_name_or_path).startswith("long-t5") :
from examples_prompt.backbones.t5 import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts
from examples_prompt.backbones.t5 import Trainer, DataCollator
elif os.path.basename(model_args.model_name_or_path).startswith("blenderbot"):
@ -129,7 +130,9 @@ def main():
from examples_prompt.backbones.blenderbot import Trainer, DataCollator
elif os.path.basename(model_args.model_name_or_path).startswith("roberta") \
or os.path.basename(model_args.model_name_or_path).startswith("bert") \
or os.path.basename(model_args.model_name_or_path).startswith("albert") :
or os.path.basename(model_args.model_name_or_path).startswith("albert") \
or os.path.basename(model_args.model_name_or_path).startswith("xlm-roberta") \
or os.path.basename(model_args.model_name_or_path).startswith("deberta") :
from examples_prompt.backbones.bert import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts
from examples_prompt.backbones.bert import Trainer, DataCollator
elif os.path.basename(model_args.model_name_or_path).startswith("beit"):
@ -144,6 +147,10 @@ def main():
elif os.path.basename(model_args.model_name_or_path).startswith("clip"):
from examples_prompt.backbones.clip import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts
from examples_prompt.backbones.clip import Trainer, DataCollator
elif os.path.basename(model_args.model_name_or_path).startswith("opt") \
or os.path.basename(model_args.model_name_or_path).startswith("gpt"):
from examples_prompt.backbones.opt import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts
from examples_prompt.backbones.opt import Trainer, DataCollator
@ -161,7 +168,8 @@ def main():
if delta_args.delta_type.lower() != "none":
from opendelta import AutoDeltaConfig,AutoDeltaModel
delta_config = AutoDeltaConfig.from_dict(vars(delta_args))
from dataclasses import asdict
delta_config = AutoDeltaConfig.from_dict(asdict(delta_args))
delta_model = AutoDeltaModel.from_config(delta_config, backbone_model=model)
delta_model.freeze_module(set_state_dict = True)
delta_model.log(delta_ratio=True, trainable_ratio=True, visualization=True)
@ -278,14 +286,9 @@ def main():
if torch.cuda.is_available() and training_args.compute_memory:
peak_memory = (torch.cuda.max_memory_allocated() / 1024 ** 2)/1000
print(
"Memory utilization",
peak_memory,
"GB"
)
performance_metrics.update({"peak_memory": peak_memory})
if training_args.compute_memory or training_args.compute_time:
print("Efficiency Statistics {}".format(performance_metrics))
logger.info("Efficiency Statistics {}".format(performance_metrics))
trainer.save_metrics("performance", performance_metrics)
# Evaluation
@ -313,17 +316,30 @@ def main():
trainer.save_metrics(f"{data_args.task_name}_test", metrics)
all_results['test'][data_args.task_name] = metrics
# from opendelta.utils.delta_hub import create_hub_repo_name
# from opendelta.utils.delta_center import create_delta_center_args, create_repo_name
# repo_name = create_hub_repo_name(root="DeltaHub",
# dataset=data_args.task_name,
# delta_type = delta_args.delta_type,
# model_name_or_path= model_args.model_name_or_path)
# results['repo_name'] = repo_name
# if delta_args.delta_type.lower() != "none":
# if training_args.push_to_hub: # TODO add description here
# delta_model.save_finetuned(push_to_hub=True, save_directory=repo_name, use_auth_token=True)
# # trainer.push_to_hub(**kwargs)
# else:
# delta_model.save_finetuned(push_to_hub=False, save_directory=repo_name, use_auth_token=True)
# center_args =
# repo_name = create_repo_name(prefix="", center_args=center_args)
# all_results['repo_name'] = repo_name
delta_model.save_finetuned(finetuned_delta_path=delta_args.finetuned_delta_path,
push_to_dc=training_args.push_to_dc,
center_args={"test_performance":all_results['test'][data_args.task_name]['test_average_metrics'],
},
center_args_pool = {**vars(model_args), **vars(data_args), **vars(training_args), **vars(delta_args)},
list_tags = ['NLI'],
dict_tags = {'purpose':'for testing'},
delay_push=True,
test_result=all_results['test']
)
with open(f"{training_args.output_dir}/results.json", 'w') as fout:

View File

@ -0,0 +1,344 @@
# coding=utf-8
# Copyright OpenDelta Team and THUNLP lab. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
A unified runing scripts for most models to do down stream tasks in a
prompt learning fashion, i.e., No classification head, all tasks are casted
to mask prediction or span prediction tasks.
Processing relevant to different backbone models are stored in ../backbones/
Adding A few lines to integrate the Delta tuning methods.
You can also adapt this script on your own tasks.
"""
import os
import sys
os.environ['MKL_THREADING_LAYER'] = 'GNU'
os.environ['MKL_SERVICE_FORCE_INTEL'] = '1'
os.environ["TOKENIZERS_PARALLELISM"] = "false"
sys.path.append(os.path.join(os.getcwd(), "../"))
sys.path.append(os.path.join(os.getcwd()))
import functools
import logging
import torch
import json
import numpy as np
import transformers
from transformers import (
AutoConfig,
AutoModelForMaskedLM,
AutoModelForSeq2SeqLM,
AutoTokenizer,
DataCollatorForSeq2Seq,
# HfArgumentParser,
# MBartTokenizer,
# default_data_collator,
Trainer,
Seq2SeqTrainer,
set_seed,
)
from transformers.trainer_utils import is_main_process, get_last_checkpoint
from data_processors import AutoTask #, #TaskDataCollatorForSeq2Seq, AutoPostProcessor, data_collator
from utils import read_json, save_json
from utils.args import ModelArguments, TrainingArguments, DataTrainingArguments, RemainArgHfArgumentParser, DeltaArguments
logger = logging.getLogger(__name__)
def main():
parser = RemainArgHfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments, DeltaArguments))
# You can provide a json file with contains the arguments and use the --argument some_arg to override or append to the json file.
json_file, cmd_args = (os.path.abspath(sys.argv[1]), sys.argv[2:]) if sys.argv[1].endswith(".json") else (None, sys.argv[1:])
model_args, data_args, training_args, delta_args, remain_args = parser.parse_json_file_with_cmd_args(json_file=json_file, command_line_args=cmd_args)
logger.warning("The following arguments not used! {}".format(remain_args))
# # exit()
# # Detecting last checkpoint.
# last_checkpoint = None
# if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
# last_checkpoint = get_last_checkpoint(training_args.output_dir)
# print("#### last_checkpoint ", last_checkpoint)
# if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
# '''
# raise ValueError(
# f"Output directory ({training_args.output_dir}) already exists and is not empty. "
# "Use --overwrite_output_dir to overcome."
# )
# '''
# pass
# elif last_checkpoint is not None:
# logger.info(
# f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
# "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
# )
# Setup logging
logging.basicConfig(
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
datefmt="%m/%d/%Y %H:%M:%S",
handlers=[logging.StreamHandler(sys.stdout)],
)
logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
# Log on each process the small summary:
logger.warning(
f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
+ f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
)
# Set the verbosity to info of the Transformers logger (on main process only):
if is_main_process(training_args.local_rank):
transformers.utils.logging.set_verbosity_info()
# logger.info("Training/evaluation parameters %s", training_args, model_args, data_args, delta_args)
logger.info("{}\n{}\n{}\n{}".format(training_args, model_args, data_args, delta_args))
# Set seed before initializing model.
set_seed(training_args.seed)
if os.path.basename(model_args.model_name_or_path).startswith("t5"):
from examples_prompt.backbones.t5 import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts
from examples_prompt.backbones.t5 import Trainer, DataCollator
elif os.path.basename(model_args.model_name_or_path).startswith("blenderbot"):
from examples_prompt.backbones.blenderbot import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts
from examples_prompt.backbones.blenderbot import Trainer, DataCollator
elif os.path.basename(model_args.model_name_or_path).startswith("roberta") \
or os.path.basename(model_args.model_name_or_path).startswith("bert") \
or os.path.basename(model_args.model_name_or_path).startswith("albert") :
from examples_prompt.backbones.bert import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts
from examples_prompt.backbones.bert import Trainer, DataCollator
elif os.path.basename(model_args.model_name_or_path).startswith("beit"):
from examples_prompt.backbones.beit import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts
from examples_prompt.backbones.beit import Trainer, DataCollator
elif os.path.basename(model_args.model_name_or_path).startswith("bart"):
from examples_prompt.backbones.bart import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts
from examples_prompt.backbones.bart import Trainer, DataCollator
elif os.path.basename(model_args.model_name_or_path).startswith("bigbird"):
from examples_prompt.backbones.bigbird import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts
from examples_prompt.backbones.bigbird import Trainer, DataCollator
elif os.path.basename(model_args.model_name_or_path).startswith("clip"):
from examples_prompt.backbones.clip import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts
from examples_prompt.backbones.clip import Trainer, DataCollator
config, tokenizer, model = get_backbone(model_args=model_args)
# model parallelize
if hasattr(training_args, "model_parallel") and training_args.model_parallel:
logger.info('parallelize model!')
model.parallelize()
from opendelta import Visualization
Visualization(model).structure_graph()
if delta_args.delta_type.lower() != "none":
from opendelta.delta_models.adapter import AdapterConfig, AdapterModel
delta_config = AdapterConfig.from_finetuned(finetuned_delta_path=delta_args.finetuned_delta_path)
delta_model = AdapterModel.from_finetuned(finetuned_delta_path=delta_args.finetuned_delta_path,
delta_config=delta_config,
backbone_model=model,
force_download=delta_args.force_download,
cache_dir=delta_args.delta_cache_dir)
# delta_model.freeze_module(set_state_dict = True)
delta_model.log(delta_ratio=True, trainable_ratio=True, visualization=True)
performance_metrics = {}
non_empty_splits_names = []
# if training_args.do_train:
# non_empty_splits_names.append("train")
# if training_args.do_eval:
# non_empty_splits_names.append("eval")
if training_args.do_test:
non_empty_splits_names.append("test")
splits = {}
for split_name in ['test']:
if split_name not in non_empty_splits_names:
splits[split_name] = None
continue
task = AutoTask.get(data_args.task_name,
data_args.dataset_config_name,
data_args=data_args,
seed=data_args.data_sample_seed)
dataset = task.get(split=split_name,
split_validation_test=training_args.split_validation_test,
n_obs=data_args.max_train_samples)
template, _verbalizer, tokenizer_wrapper = get_prompts(task, tokenizer, data_args)
dataset = dataset.map(
functools.partial(preprocess_function,
data_args=data_args,
tokenizer=tokenizer,
template=template,
verbalizer=_verbalizer,
tokenizer_wrapper=tokenizer_wrapper,
split=split_name),
batched=False,
num_proc=data_args.preprocessing_num_workers,
remove_columns=get_remove_columns(list(dataset.features.keys())),
load_from_cache_file=not data_args.overwrite_cache,
)
# from IPython import embed; embed()
splits[split_name] = dataset
if split_name == "test":
eval_task = task
verbalizer = _verbalizer
trainer = Trainer(
model=model,
verbalizer=verbalizer,
eval_task=eval_task,
args=training_args,
# train_dataset=splits['train'],
# eval_dataset=splits['eval'],
tokenizer=tokenizer,
data_collator=DataCollator(tokenizer),
)
def save_training_config(config_file, output_dir):
json_data = read_json(config_file)
save_json(os.path.join(output_dir, "training_config.json"), json_data)
# Saves training config.
if trainer.is_world_process_zero():
save_training_config(sys.argv[1], training_args.output_dir)
# # Training
# if training_args.do_train:
# checkpoint = None
# if training_args.resume_from_checkpoint is not None:
# checkpoint = training_args.resume_from_checkpoint
# elif last_checkpoint is not None:
# checkpoint = last_checkpoint
# if training_args.compute_time:
# torch.cuda.synchronize() # wait for move to complete
# start = torch.cuda.Event(enable_timing=True)
# end = torch.cuda.Event(enable_timing=True)
# start.record()
# train_result = trainer.train(resume_from_checkpoint=checkpoint)
# if training_args.compute_time:
# end.record()
# torch.cuda.synchronize() # wait for all_reduce to complete
# total_time = start.elapsed_time(end)/(1000*60)
# performance_metrics.update({"total_time in minutes ": total_time})
# trainer.save_model() # Saves the tokenizer too for easy upload
# train_metrics = train_result.metrics
# max_train_samples = (
# data_args.max_train_samples if data_args.max_train_samples is not None else len(splits['train'])
# )
# train_metrics["train_samples"] = min(max_train_samples, len(splits['train']))
# trainer.log_metrics("train", train_metrics)
# trainer.save_metrics("train", train_metrics)
# trainer.save_state()
# if torch.cuda.is_available() and training_args.compute_memory:
# peak_memory = (torch.cuda.max_memory_allocated() / 1024 ** 2)/1000
# print(
# "Memory utilization",
# peak_memory,
# "GB"
# )
# performance_metrics.update({"peak_memory": peak_memory})
# if training_args.compute_memory or training_args.compute_time:
# print("Efficiency Statistics {}".format(performance_metrics))
# trainer.save_metrics("performance", performance_metrics)
# Evaluation
all_results = {}
# all_results['evaluate'] = {}
# if training_args.do_eval:
# logger.info("*** Evaluate ***")
# metrics = trainer.evaluate(eval_dataset=splits['eval'],
# )
# trainer.log_metrics(f"{data_args.task_name}_eval", metrics)
# trainer.save_metrics(f"{data_args.task_name}_eval", metrics)
# all_results['evaluate'][data_args.task_name] = metrics
# Test
all_results['test'] = {}
if training_args.do_test:
logger.info("*** Test ***")
metrics = trainer.evaluate(eval_dataset=splits['test'],
metric_key_prefix="test"
)
trainer.log_metrics(f"{data_args.task_name}_test", metrics)
trainer.save_metrics(f"{data_args.task_name}_test", metrics)
all_results['test'][data_args.task_name] = metrics
# from opendelta.utils.delta_hub import create_hub_repo_name
# from opendelta.utils.delta_center import create_delta_center_args, create_repo_name
# repo_name = create_hub_repo_name(root="DeltaHub",
# dataset=data_args.task_name,
# delta_type = delta_args.delta_type,
# model_name_or_path= model_args.model_name_or_path)
# center_args =
# repo_name = create_repo_name(prefix="", center_args=center_args)
# all_results['repo_name'] = repo_name
# delta_model.save_finetuned(push_to_hf=training_args.push_to_hf,
# push_to_dc=training_args.push_to_dc,
# center_args={},
# center_args_pool = {**vars(model_args), **vars(data_args), **vars(training_args), **vars(delta_args)},
# delay_push=True,
# )
print(all_results)
# with open(f"{training_args.output_dir}/results.json", 'w') as fout:
# string = json.dumps(all_results, indent=4,sort_keys=True)
# fout.write(string+"\n")
return all_results
if __name__ == "__main__":
result = main()

View File

@ -1,6 +1,10 @@
from dataclasses import dataclass, field
from typing import Optional, List
from transformers import HfArgumentParser
from pathlib import Path
import sys
@dataclass
class ModelArguments:
@ -81,6 +85,10 @@ class TrainingArguments(HfTrainingArguments):
remove_unused_columns: Optional[bool] = field(
default=False, metadata={"help": "Remove columns not required by the model when using an nlp.Dataset."}
)
push_to_hf: Optional[bool] = field(default=False, metadata={"help": "Push the model to huggingface model hub."})
push_to_dc: Optional[bool] = field(default=True, metadata={"help": "Push the model to delta center."})
@ -211,28 +219,254 @@ class DataTrainingArguments:
self.test_max_target_length = self.max_target_length
import dataclasses
@dataclass
class DeltaArguments:
"""
Arguments pertaining to what data we are going to input our model for training and eval.
"""
delta_type: str= field(default="", metadata={"help": "the type of delta"})
backbone_model: Optional[str] = field(
default="", metadata={"help": "the backbone model"}
)
model_path_public: Optional[str] = field(
default="", metadata={"help": "the path (url) of the publicly available backbone model"}
)
modified_modules: Optional[List[str]] = field(
default_factory=lambda: None, metadata={"help": "the modules inside the backbone to be modified"}
)
unfrozen_modules: Optional[List[str]] = field(
default_factory=lambda:["deltas"], metadata={"help": "the modules inside the backbone or in the delta modules that need to be unfrozen"}
)
finetuned_delta_path: Optional[str] = field(
default=None, metadata={"help": "the path of the finetuned delta model"}
)
force_download: Optional[bool] = field(
default=False, metadata={"help": "whether to download the checkpoint form delta center no matter whether it exists"}
)
local_files_only: Optional[bool] = field(
default=False, metadata={"help": "whether not to look for file in delta center"}
)
delta_cache_dir: Optional[str] = field(
default=None, metadata={"help": "The cache path defined by user. If not set, we will firstly look into the"+
" working directory and then into the default cache path (ususally ~/.cache/delta_center)."}
)
delay_push: Optional[bool] = field(
default=True, metadata={
'help':'whether push the checkpoint to delta center later.'
}
)
def merge_arguments(self, objb):
print(objb)
self.__class__ = dataclasses.make_dataclass('DeltaArgument', fields=[(s.name, s.type, getattr(objb, s.name)) for s in dataclasses.fields(objb)], bases=(DeltaArguments,))
@dataclass
class AdapterArguments:
bottleneck_dim: Optional[int] = field(
default=24, metadata={"help": "the dimension of the bottleneck layer"}
)
@dataclass
class LoRAArguments:
lora_r: Optional[int] = field(
default=8, metadata={"help": "the rank of the LoRA metrics."}
)
@dataclass
class PrefixArguments:
pass
@dataclass
class BitFitArguments:
pass
@dataclass
class SoftPromptArguments:
soft_token_num: Optional[int] = field(
default=100, metadata={"help": "the num of soft tokens."}
)
@dataclass
class CompacterArguments:
pass
@dataclass
class LowRankAdapterArguments:
pass
# from opendelta.delta_models.adapter import AdapterConfig
# from opendelta.delta_models.bitfit import BitFitConfig
# from opendelta.delta_models.compacter import CompacterConfig
# from opendelta.delta_models.lora import LoraArguments
# from opendelta.delta_models.low_rank_adapter import LowRankAdapterConfig
# from opendelta.delta_models.prefix import PrefixConfig
# from opendelta.delta_models.soft_prompt import SoftPromptConfig
# DELTAARGMAP = {
# "adapter": AdapterConfig,
# "lora":LoraArguments,
# "prefix":PrefixConfig,
# "bitfit":BitFitConfig,
# "soft_prompt":SoftPromptConfig,
# "compacter":CompacterConfig,
# "low_rank_adapter":LowRankAdapterConfig
# }
DELTAARGMAP = {
"adapter": AdapterArguments,
"lora":LoRAArguments,
"prefix":PrefixArguments,
"bitfit":BitFitArguments,
"soft_prompt":SoftPromptArguments,
"compacter":CompacterArguments,
"low_rank_adapter":LowRankAdapterArguments
}
# TODO: add more specific delta arguments
class RemainArgHfArgumentParser(HfArgumentParser):
def parse_json_file(self, json_file: str, return_remaining_args=True ):
'''This is a more powerful version of argument parser.
It can receiven both command line arguments and json file arguments.
The command line arguments will override the json file arguments.
The parser will load the specific delta arguments (e.g. Adapter's)
according to the delta_type argument. And merge the specific delta arguments
with the common delta arguments.
'''
def parse_json_file_with_cmd_args(self, json_file: str, command_line_args=None, return_remaining_args=True ):
"""
Alternative helper method that does not use `argparse` at all, instead loading a json file and populating the
dataclass types.
"""
import argparse
import json
from pathlib import Path
import dataclasses
data = json.loads(Path(json_file).read_text())
data_str = ""
if command_line_args is None:
command_line_args = []
for key in data:
if "--"+key not in command_line_args:
if isinstance(data[key], list):
data_str += "--"+key
for elem in data[key]:
data_str+=" "+ str(elem)
data_str += " "
else:
data_str+= "--" + key + " " + str(data[key]) + " "
data_list = data_str.split()
data_list += command_line_args
if return_remaining_args:
outputs, remain_args = self.parse_args_into_dataclasses(args=data_list, return_remaining_strings=return_remaining_args)
for d in outputs:
if isinstance(d, DeltaArguments): # merge the specific delta arguments
d.merge_arguments(outputs[-1])
return [*(outputs[:-1]), remain_args]
else:
outputs = self.parse_args_into_dataclasses(args=data_list, return_remaining_strings=return_remaining_args)
for d in outputs:
if isinstance(d, DeltaArguments):
d.merge_arguments(outputs[-1])
return [*(outputs[:-1]),]
def parse_args_into_dataclasses(
self, args=None, return_remaining_strings=False, look_for_args_file=True, args_filename=None
):
"""
Parse command-line args into instances of the specified dataclass types.
This relies on argparse's `ArgumentParser.parse_known_args`. See the doc at:
docs.python.org/3.7/library/argparse.html#argparse.ArgumentParser.parse_args
Args:
args:
List of strings to parse. The default is taken from sys.argv. (same as argparse.ArgumentParser)
return_remaining_strings:
If true, also return a list of remaining argument strings.
look_for_args_file:
If true, will look for a ".args" file with the same base name as the entry point script for this
process, and will append its potential content to the command line args.
args_filename:
If not None, will uses this file instead of the ".args" file specified in the previous argument.
Returns:
Tuple consisting of:
- the dataclass instances in the same order as they were passed to the initializer.abspath
- if applicable, an additional namespace for more (non-dataclass backed) arguments added to the parser
after initialization.
- The potential list of remaining argument strings. (same as argparse.ArgumentParser.parse_known_args)
"""
if args_filename or (look_for_args_file and len(sys.argv)):
if args_filename:
args_file = Path(args_filename)
else:
args_file = Path(sys.argv[0]).with_suffix(".args")
if args_file.exists():
fargs = args_file.read_text().split()
args = fargs + args if args is not None else fargs + sys.argv[1:]
# in case of duplicate arguments the first one has precedence
# so we append rather than prepend.
namespace, remaining_args = self.parse_known_args(args=args)
# conditionally add delta arguments
deltatype_args = DELTAARGMAP[namespace.delta_type]
self.dataclass_types.append(deltatype_args)
self._add_dataclass_arguments(deltatype_args)
# parse the arguments again, this time with the specific delta type's arguments
namespace, remaining_args = self.parse_known_args(args=args)
outputs = []
for dtype in self.dataclass_types:
keys = {f.name for f in dataclasses.fields(dtype) if f.init}
inputs = {k: data.pop(k) for k in list(data.keys()) if k in keys}
inputs = {k: v for k, v in vars(namespace).items() if k in keys}
for k in keys:
delattr(namespace, k)
obj = dtype(**inputs)
outputs.append(obj)
remain_args = argparse.ArgumentParser()
remain_args.__dict__.update(data)
if return_remaining_args:
return (*outputs, remain_args)
if len(namespace.__dict__) > 0:
# additional namespace.
outputs.append(namespace)
if return_remaining_strings:
return (outputs, remaining_args)
else:
return (*outputs,)
if remaining_args:
raise ValueError(f"Some specified arguments are not used by the HfArgumentParser: {remaining_args}")
return outputs
# namespace, remaining_args = self.parse_known_args(args=data_list)
# print("Here", command_line_args, data_list,namespace, remaining_args)
# data.update(remain_args)
# outputs = []
# for dtype in self.dataclass_types:
# keys = {f.name for f in dataclasses.fields(dtype) if f.init}
# inputs = {k: namespace.get(k) for k in list(data.keys()) if k in keys}
# obj = dtype(**inputs)
# outputs.append(obj)
# # remain_args = argparse.ArgumentParser()
# remain_args.__dict__.update(remain_args)
# if return_remaining_args:
# return (*outputs, remain_args)
# else:
# return (*outputs,)

View File

@ -1,48 +0,0 @@
"""Install Compacter."""
import os
import setuptools
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
#os.environ['TORCH_CUDA_ARCH_LIST']="3.5;3.7;6.1;7.0;7.5;8.6+PTX"
def setup_package():
long_description = "examples_prompt"
setuptools.setup(
name='examples_prompt',
version='0.0.1',
description='textual prompt example',
long_description=long_description,
long_description_content_type='text/markdown',
author='Shengding Hu',
license='MIT License',
packages=setuptools.find_packages(
exclude=['docs', 'tests', 'scripts']),
dependency_links=[
'https://download.pytorch.org/whl/torch_stable.html',
],
classifiers=[
'Intended Audience :: Developers',
'Intended Audience :: Science/Research',
'License :: OSI Approved :: MIT License',
'Topic :: Scientific/Engineering :: Artificial Intelligence',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.7.10',
],
keywords='text nlp machinelearning',
# ext_modules=[
# CUDAExtension('seq2seq.projections.fwh_cuda',
# sources=[
# 'seq2seq/projections/fwh_cuda/fwh_cpp.cpp',
# 'seq2seq/projections/fwh_cuda/fwh_cu.cu',
# ]
# )
# ]
# ,
cmdclass={"build_ext": BuildExtension},
install_requires=[
],
)
if __name__ == '__main__':
setup_package()

View File

@ -1,5 +1,5 @@
__version__ = "0.1.0"
__version__ = "0.2.4"
class GlobalSetting:
def __init__(self):

View File

@ -2,16 +2,14 @@ from copy import deepcopy
from typing import Any, Dict, OrderedDict
from opendelta.utils.visualization import Visualization
import torch.nn as nn
from transformers.file_utils import PushToHubMixin
from opendelta.utils.logging import get_logger
import importlib
from opendelta.delta_configs import BaseDeltaConfig
from opendelta.basemodel import DeltaBase
logger = get_logger(__name__)
DELTA_CONFIG_MAPPING = {
"lora": "LoraConfig",
"lora": "LoraConfig",
"low_rank_adapter": "LowRankAdapterConfig",
"bitfit": "BitFitConfig",
"adapter":"AdapterConfig",
@ -91,18 +89,18 @@ class AutoDeltaConfig:
"AutoConfig is designed to be instantiated "
"using the ``AutoConfig.from_pretrained(pretrained_model_name_or_path)`` method."
)
@classmethod
def from_dict(cls, config_dict: Dict[str, Any], **kwargs):
r""" Instantiate a DeltaConfig according to the dict. Automatically load the config specified by
r""" Instantiate a DeltaConfig according to the dict. Automatically load the config specified by
:obj:`delta_type`.
Args:
config_dict (:obj:`dict`): The dict of configs of delta model.
kwargs: Other keyword argument pass to initialize the config.
kwargs: Other keyword argument pass to initialize the config.
>>> config = AutoDeltaConfig.from_dict({"delta_type":"lora"}) # This will load the dault lora config.
>>> config = AutoDeltaConfig.from_dict({"delta_type":"lora", "lora_r":5}) # Will load the default lora config, with lora_r = 5
>>> config = AutoDeltaConfig.from_dict({"delta_type":"lora", "lora_r":5}) # Will load the default lora config, with lora_r = 5
"""
config_dict = deepcopy(config_dict)
@ -114,7 +112,7 @@ class AutoDeltaConfig:
@classmethod
def from_finetuned(cls, finetuned_model_name_or_path, **kwargs):
def from_finetuned(cls, finetuned_delta_path, **kwargs):
r"""
Instantiate one of the configuration classes of the library from a finetuned delta model configuration.
The configuration class to instantiate is selected based on the ``delta_type`` property of the config object that
@ -122,18 +120,18 @@ class AutoDeltaConfig:
Parameters:
finetuned_model_name_or_path (:obj:`str` or :obj:`os.PathLike`, *optional*):
finetuned_delta_path (:obj:`str` or :obj:`os.PathLike`, *optional*):
Can be either:
- A string, the *model id* of a finetuned delta model configuration hosted inside a model repo on
huggingface.co. Valid model ids can be located at the root-level, like ``Davin/lora``, or
namespaced under a user or organization name, like ``DeltaHub/lora_t5-base_mrpc``.
- A path to a *directory* containing a configuration file saved using the
:py:meth:`DeltaBase.save_finetuned` method,
:py:meth:`DeltaBase.save_finetuned` method,
e.g., ``./my_model_directory/``.
- A path or url to a saved configuration JSON *file*, e.g.,
``./my_model_directory/configuration.json``.
The last two option are not tested but inherited from huggingface.
The last two option are not tested but inherited from huggingface.
cache_dir (:obj:`str` or :obj:`os.PathLike`, *optional*):
Path to a directory in which a downloaded pretrained model configuration should be cached if the
standard cache should not be used.
@ -163,9 +161,9 @@ class AutoDeltaConfig:
The values in kwargs of any keys which are configuration attributes will be used to override the loaded
values. Behavior concerning key/value pairs whose keys are *not* configuration attributes is controlled
by the ``return_unused_kwargs`` keyword parameter.
Examples:
.. code-block:: python
from transformers import AutoConfig
@ -173,25 +171,24 @@ class AutoDeltaConfig:
"""
kwargs["name_or_path"] = finetuned_model_name_or_path
config_dict, _ = BaseDeltaConfig.get_config_dict(finetuned_model_name_or_path, **kwargs)
config_dict, kwargs = BaseDeltaConfig.get_config_dict(finetuned_delta_path, **kwargs)
if "delta_type" in config_dict:
config_class = LAZY_CONFIG_MAPPING[config_dict["delta_type"]]
return config_class.from_dict(config_dict, **kwargs)
else:
# Fallback: use pattern matching on the string.
for pattern, config_class in LAZY_CONFIG_MAPPING.items():
if pattern in str(finetuned_model_name_or_path):
if pattern in str(finetuned_delta_path):
return config_class.from_dict(config_dict, **kwargs)
raise ValueError(
f"Unrecognized model in {finetuned_model_name_or_path}. "
f"Unrecognized model in {finetuned_delta_path}. "
f"Should have a `delta_type` key in the loaded config, or contain one of the following strings "
f"in its name: {', '.join(LAZY_CONFIG_MAPPING.keys())}"
)
### AutoModels below
### AutoModels below
class _LazyAutoMapping(OrderedDict):
"""
@ -323,20 +320,20 @@ class AutoDeltaModel:
f"using the `{self.__class__.__name__}.from_pretrained(pretrained_model_name_or_path)` or "
f"`{self.__class__.__name__}.from_config(config)` methods."
)
@classmethod
def from_config(cls, config, backbone_model, **kwargs): #-> "DeltaBase":
r"""Automatically instantiates a delta model based on the :obj:`config`. The delta model correspond to the delta
:obj:`config` will be loaded and initialized using the arguments in :obj:`config`.
:obj:`config` will be loaded and initialized using the arguments in :obj:`config`.
.. note::
Only using :meth:`from_config` method will not load the finetuned weight file (e.g., pytorch_model.bin).
Please use from_finetuned directly.
Only using :meth:`from_config` method will not load the finetuned weight file (e.g., pytorch_model.bin).
Please use from_finetuned directly.
Args:
config (:obj:`BaseDeltaConfig`):
backbone_model (:obj:`nn.Module`):
Examples:
.. code-block:: python
@ -355,53 +352,54 @@ class AutoDeltaModel:
)
@classmethod
def from_finetuned(cls, finetuned_model_name_or_path, backbone_model, *model_args, **kwargs):
r""" Automatically instantiated a delta model and load the finetuned checkpoints based on the
:obj:`finetuned_model_name_or_path`, which can either be a string pointing to a local path or a url pointint to
the delta hub. It will check the hash after loading the delta model to see whether the correct backbone and
delta checkpoint are used.
def from_finetuned(cls, finetuned_delta_path, backbone_model, *model_args, **kwargs):
r""" Automatically instantiated a delta model and load the finetuned checkpoints based on the
:obj:`finetuned_delta_path`, which can either be a string pointing to a local path or a url pointint to
the delta hub. It will check the hash after loading the delta model to see whether the correct backbone and
delta checkpoint are used.
Args:
finetuned_model_name_or_path (:obj:`str` or :obj:`os.PathLike`, *optional*):
finetuned_delta_path (:obj:`str` or :obj:`os.PathLike`, *optional*):
Can be either:
- A string, the *model id* of a finetuned delta model configuration hosted inside a model repo on
huggingface.co. Valid model ids can be located at the root-level, like ``Davin/lora``, or
namespaced under a user or organization name, like ``DeltaHub/lora_t5-base_mrpc``.
- A path to a *directory* containing a configuration file saved using the
:py:meth:`DeltaBase.save_finetuned` method,
:py:meth:`DeltaBase.save_finetuned` method,
e.g., ``./my_model_directory/``.
- A path or url to a saved configuration JSON *file*, e.g.,
``./my_model_directory/configuration.json``.
The last two option are not tested but inherited from huggingface.
The last two option are not tested but inherited from huggingface.
backbone_model (:obj:`nn.Module`): The backbone model to be modified.
model_args: Other argument for initialize the model.
kwargs: Other kwargs that will be passed into DeltaBase.from_finetuned.
Example:
.. code-block:: python
delta_model = AutoDeltaModel.from_finetuned("DeltaHub/lora_t5-base-mrpc", backbone_model)
"""
config = kwargs.pop("config", None)
delta_config = kwargs.pop("delta_config", None)
if not isinstance(config, BaseDeltaConfig):
config, kwargs = AutoDeltaConfig.from_finetuned(
finetuned_model_name_or_path, return_unused_kwargs=True, **kwargs
if not isinstance(delta_config, BaseDeltaConfig):
delta_config, kwargs = AutoDeltaConfig.from_finetuned(
finetuned_delta_path, return_unused_kwargs=True, **kwargs
)
if type(config) in cls._delta_model_mapping.keys():
model_class = cls._delta_model_mapping[type(config)]
return model_class.from_finetuned(finetuned_model_name_or_path, backbone_model, *model_args, **kwargs)
if type(delta_config) in cls._delta_model_mapping.keys():
model_class = cls._delta_model_mapping[type(delta_config)]
return model_class.from_finetuned(finetuned_delta_path, backbone_model, *model_args, delta_config=delta_config, **kwargs)
raise ValueError(
f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n"
f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}."
)
if __name__ == "__main__":

View File

@ -671,21 +671,46 @@ class DeltaBase(nn.Module, SaveLoadMixin):
if visualization:
from opendelta import Visualization
Visualization(module).structure_graph()
self.get_statistics(module)
if trainable_ratio:
n_trainable = self.num_trainable_parameters(module)
n_total = self.num_total_parameters(module)
logger.info("Trainable Ratio: {:2f}%".format(n_trainable/n_total*100))
logger.info("Trainable Ratio: {:2f}%".format(self.stat['trainable_ratio']*100))
if delta_ratio:
n_delta = self.num_delta_parameters(module)
n_total = self.num_total_parameters(module)
logger.info("Delta Parameter Ratio: {:2f}%".format(n_delta/n_total*100))
logger.info("Delta Parameter Ratio: {:2f}%".format(self.stat['delta_ratio']*100))
if cuda_memory:
cudamem = 0
maxcudamem = 0
for device_id in range(torch.cuda.device_count()):
cudamem += torch.cuda.memory_allocated(f"cuda:{device_id}")/1024**3
maxcudamem += torch.cuda.max_memory_allocated(f"cuda:{device_id}")/1024**3
logger.info("Static Memory {:.2f} GB, Max Memory {:.2f} GB".format(cudamem, maxcudamem))
logger.info("Static Memory {:.2f} GB, Max Memory {:.2f} GB".format(self.stat['cudamem'], self.stat['maxcudamem']))
def get_statistics(self, module=None):
r"""Get the statistics of the parameters in the delta modules.
Args:
module (:obj:`nn.Module`, *optional*): The module to compute the statistics.
Returns:
:obj:`dict`: The statistics of the parameters in the delta modules.
"""
if module is None:
module = self.backbone_model
self.stat = {}
n_trainable = self.num_trainable_parameters(module)
n_total = self.num_total_parameters(module)
self.stat['trainable_ratio'] = n_trainable/n_total
n_delta = self.num_delta_parameters(module)
n_total = self.num_total_parameters(module)
self.stat['delta_ratio'] = n_delta/n_total
cudamem = 0
maxcudamem = 0
for device_id in range(torch.cuda.device_count()):
cudamem += torch.cuda.memory_allocated(f"cuda:{device_id}")/1024**3
maxcudamem += torch.cuda.max_memory_allocated(f"cuda:{device_id}")/1024**3
self.stat['cudamem'] = cudamem
self.stat['maxcudamem'] = maxcudamem

Some files were not shown because too many files have changed in this diff Show More