move must_try to unittest, unitest to examples

This commit is contained in:
shengdinghu 2022-10-16 13:46:45 +00:00
parent 62f03f0068
commit 85d21ab038
6 changed files with 1 additions and 385 deletions

View File

@ -83,7 +83,7 @@ pip install -r requirements.txt && python setup.py develop
```
## Must Try
The following codes and comments walk you through the key functionality of OpenDelta.
The following codes and comments walk you through the key functionality of OpenDelta. It is also in [must_try.py](https://github.com/thunlp/OpenDelta/tree/main/unittest/must_try.py)
```python
# use tranformers as usual.

View File

@ -1,200 +0,0 @@
import time
import random
import torch
import bmtrain as bmt
import numpy as np
import os
import csv
from model_center import get_args
from model_center.model import CPM2
from model_center.tokenizer import CPM2Tokenizer
from model_center.dataset.cpm2dataset import DATASET
from model_center.utils import print_inspect
from model_center.dataset import DistributedDataLoader
def get_tokenizer(args):
tokenizer = CPM2Tokenizer.from_pretrained(args.model_config)
return tokenizer
def get_model(args):
model = CPM2.from_pretrained(args.model_config)
return model
def get_optimizer(args, model):
optimizer = bmt.optim.AdamOffloadOptimizer(model.parameters(), weight_decay=args.weight_decay)
return optimizer
def get_learning_rate_scheduler(args, optimizer):
if args.lr_decay_iters is None:
args.lr_decay_iters = args.train_iters * args.epochs
if args.lr_decay_style == "noam":
lr_scheduler = bmt.lr_scheduler.Noam(optimizer,
start_lr = args.lr,
warmup_iter = args.warmup_iters,
end_iter = args.lr_decay_iters,
num_iter = args.start_step)
elif args.lr_decay_style == "constant":
lr_scheduler = bmt.lr_scheduler.NoDecay(optimizer,
start_lr = args.lr,
warmup_iter = args.warmup_iters,
end_iter = -1,
num_iter = args.start_step)
elif args.lr_decay_style == "linear":
lr_scheduler = bmt.lr_scheduler.Linear(optimizer,
start_lr = args.lr,
warmup_iter = args.warmup_iters,
end_iter = args.lr_decay_iters,
num_iter = args.start_step)
elif args.lr_decay_style == "exponential":
lr_scheduler = bmt.lr_scheduler.Exponential(optimizer,
start_lr = args.lr,
warmup_iter = args.warmup_iters,
end_iter = args.lr_decay_iters,
num_iter = args.start_step)
elif args.lr_decay_style == "cosine":
lr_scheduler = bmt.lr_scheduler.Cosine(optimizer,
start_lr = args.lr,
warmup_iter = args.warmup_iters,
end_iter = args.lr_decay_iters,
num_iter = args.start_step)
else:
raise ValueError(f"lr_scheduler of type {args.lr_decay_style} is not supported yet.")
return lr_scheduler
def setup_model_and_optimizer(args):
# get the tokenizer
tokenizer = get_tokenizer(args)
# get the model
model = get_model(args)
bmt.synchronize()
# get the optimizer and lr_scheduler
optimizer = get_optimizer(args, model)
lr_scheduler = get_learning_rate_scheduler(args, optimizer)
bmt.synchronize()
# get the memory usage
bmt.print_rank("Model mem\n", torch.cuda.memory_summary())
bmt.synchronize()
return tokenizer, model, optimizer, lr_scheduler
def initialize():
# get arguments
args = get_args()
# init bmt
bmt.init_distributed(seed = args.seed)
# init save folder
if args.save != None:
os.makedirs(args.save, exist_ok=True)
return args
def prepare_dataset(args, tokenizer, base_path, dataset_name, rank, world_size):
splits = ['train', 'dev', 'test']
dataset = {}
for split in splits:
dataset[split] = DATASET[dataset_name](base_path, split, rank, world_size, tokenizer, args.max_encoder_length, args.max_decoder_length)
verbalizer = torch.LongTensor(DATASET[dataset_name].get_verbalizer(tokenizer)).cuda()
return dataset, verbalizer
def finetune(args, tokenizer, model, optimizer, lr_scheduler, dataset, verbalizer):
loss_func = bmt.loss.FusedCrossEntropy(ignore_index=-100)
optim_manager = bmt.optim.OptimManager(loss_scale=args.loss_scale)
optim_manager.add_optimizer(optimizer, lr_scheduler)
dataloader = {
"train": DistributedDataLoader(dataset['train'], batch_size=args.batch_size, shuffle=True),
"dev": DistributedDataLoader(dataset['dev'], batch_size=args.batch_size, shuffle=False),
"test": DistributedDataLoader(dataset['test'], batch_size=args.batch_size, shuffle=False),
}
for epoch in range(5):
model.train()
for it, data in enumerate(dataloader['train']):
enc_input = data["enc_input"]
enc_length = data["enc_length"]
dec_input = data["dec_input"]
dec_length = data["dec_length"]
targets = data["targets"]
index = data["index"]
logits = model(enc_input, enc_length, dec_input, dec_length)
logits = logits.index_select(dim=-1, index=verbalizer)
logits = logits[torch.where(index==1)]
loss = loss_func(logits, targets)
global_loss = bmt.sum_loss(loss).item()
optim_manager.zero_grad()
optim_manager.backward(loss)
grad_norm = optim_manager.clip_grad_norm(optimizer.param_groups, args.clip_grad, norm_type = 2)
optim_manager.step()
bmt.print_rank(
"train | epoch {:3d} | Iter: {:6d}/{:6d} | loss: {:.4f} | lr: {:.4e}, scale: {:10.4f} | grad_norm: {:.4f} |".format(
epoch,
it,
len(dataloader["train"]),
global_loss,
lr_scheduler.current_lr,
int(optim_manager.loss_scale),
grad_norm,
)
)
# if it % args.inspect_iters == 0: print_inspect(model, "*")
# if args.save != None and it % args.save_iters == 0:
# bmt.save(model, os.path.join(args.save, args.save_name+("-%d.pt" % it)))
model.eval()
with torch.no_grad():
acc = 0
total = 0
for it, data in enumerate(dataloader['dev']):
enc_input = data["enc_input"]
enc_length = data["enc_length"]
dec_input = data["dec_input"]
dec_length = data["dec_length"]
targets = data["targets"]
index = data["index"]
logits = model(enc_input, enc_length, dec_input, dec_length)
logits = logits.index_select(dim=-1, index=verbalizer)
logits = logits[torch.where(index==1)]
logits = logits.argmax(dim=-1)
acc += torch.sum(logits == targets).item()
total += logits.shape[0]
bmt.print_rank(
"dev | epoch {:3d} | Iter: {:6d}/{:6d} | acc: {:6d} | total: {:6d} |".format(
epoch,
it,
len(dataloader["dev"]),
acc,
total,
)
)
acc = torch.tensor(acc / total).cuda()
acc = bmt.sum_loss(acc).cpu().item()
bmt.print_rank(f"dev epoch {epoch}: accuracy: {acc}")
def main():
args = initialize()
tokenizer, model, optimizer, lr_scheduler = setup_model_and_optimizer(args)
dataset, verbalizer = prepare_dataset(
args,
tokenizer,
f"{args.base_path}/down_data/paraphrase",
args.dataset_name,
bmt.rank(), bmt.world_size(),
)
finetune(args, tokenizer, model, optimizer, lr_scheduler, dataset, verbalizer)
if __name__ == "__main__":
main()

View File

@ -1,2 +0,0 @@

View File

@ -1,182 +0,0 @@
# Adapted from Tevatron (https://github.com/texttron/tevatron)
from argparse import ArgumentParser
import logging
import os
import sys
import torch.nn as nn
logger = logging.getLogger(__name__)
class UnitTest:
def __init__(self, models):
self.models = models
self.Configs = {}
self.Configs[0] = {
"delta_type": "lora",
}
self.Configs[1] = {
"delta_type": "bitfit",
}
self.Configs[2] = {
"delta_type": "adapter",
}
self.Configs[3] = {
"delta_type": "compacter",
}
self.Configs[4] = {
"delta_type": "prefix",
}
self.Configs[5] = {
"delta_type": "soft_prompt",
}
self.Configs[6] = {
"delta_type": "low_rank_adapter",
}
def get_delta_config(self, config_id):
return self.Configs[config_id]
def unitTest0(self, delta_config_dict):
model = self.models[0]
from opendelta import Visualization
Visualization(model).structure_graph()
from opendelta import AutoDeltaConfig, AutoDeltaModel
delta_config = AutoDeltaConfig.from_dict(delta_config_dict)
delta_model = AutoDeltaModel.from_config(delta_config, backbone_model = model)
from opendelta import Visualization
Visualization(model).structure_graph()
def unitTest1(self, delta_config_dict):
class Mymodel(nn.Module):
def __init__(self, a,b):
super().__init__()
self.a = a
self.b = b
model = Mymodel(self.models[0], self.models[1])
from opendelta import Visualization
Visualization(model).structure_graph()
from opendelta import AutoDeltaConfig, AutoDeltaModel
delta_config = AutoDeltaConfig.from_dict(delta_config_dict)
delta_model = AutoDeltaModel.from_config(delta_config, backbone_model = model)
from opendelta import Visualization
Visualization(model).structure_graph()
delta_model.save_finetuned("./tmp")
delta_model.freeze_module(exclude=['deltas'])
delta_model.save_finetuned("./tmp")
model = Mymodel(self.models[0], self.models[1])
Visualization(model).structure_graph()
delta_model = AutoDeltaModel.from_finetuned("./tmp", backbone_model=model)
Visualization(model).structure_graph()
def unit_test(self, test_id, config_id):
delta_config_dict = self.Configs[config_id]
if test_id == 0:
self.unitTest0(delta_config_dict)
elif test_id == 1:
self.unitTest1(delta_config_dict)
from dataclasses import dataclass, field
@dataclass
class UnitTestArguments:
"""
Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
"""
config_id: int = field(
default=0,
)
test_id: int = field(
default=0,
)
model_name_or_path: str =field(
default='bert-base-cased',
metadata={"help": "tested: bert-base-cased, roberta-base, rinna/japanese-gpt2-small, t5-small, facebook/opt-125m"}
)
from transformers import HfArgumentParser,TrainingArguments, AutoModel, GPT2Model
def main():
parser = HfArgumentParser((TrainingArguments, UnitTestArguments))
if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
training_args, unit_test_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
else:
training_args, unit_test_args = parser.parse_args_into_dataclasses()
training_args: TrainingArguments
if (
os.path.exists(training_args.output_dir)
and os.listdir(training_args.output_dir)
and training_args.do_train
and not training_args.overwrite_output_dir
):
raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome."
)
# Setup logging
logging.basicConfig(
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
datefmt="%m/%d/%Y %H:%M:%S",
level=logging.INFO if training_args.local_rank in [-1, 0] else logging.WARN,
)
logger.warning(
"Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
training_args.local_rank,
training_args.device,
training_args.n_gpu,
bool(training_args.local_rank != -1),
training_args.fp16,
)
logger.info("Training/evaluation parameters %s", training_args)
model = AutoModel.from_pretrained(unit_test_args.model_name_or_path)
import torch
import copy
models = [model, copy.deepcopy(model)]
unit_test = UnitTest(models)
unit_test.unit_test(unit_test_args.test_id, unit_test_args.config_id)
if __name__ == "__main__":
main()