From 85d21ab0382cfb570bbc7f32fc1ab5f8c8ceba89 Mon Sep 17 00:00:00 2001 From: shengdinghu Date: Sun, 16 Oct 2022 13:46:45 +0000 Subject: [PATCH] move must_try to unittest, unitest to examples --- README.md | 2 +- unittest/test_accelerate.py | 0 unittest/test_bmtrain.py | 200 -------------------------------- unittest/test_half_precision.py | 0 unittest/test_main.py | 2 - unittest/user_defined.py | 182 ----------------------------- 6 files changed, 1 insertion(+), 385 deletions(-) delete mode 100644 unittest/test_accelerate.py delete mode 100644 unittest/test_bmtrain.py delete mode 100644 unittest/test_half_precision.py delete mode 100644 unittest/test_main.py delete mode 100644 unittest/user_defined.py diff --git a/README.md b/README.md index f212972..443ea60 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ pip install -r requirements.txt && python setup.py develop ``` ## Must Try -The following codes and comments walk you through the key functionality of OpenDelta. +The following codes and comments walk you through the key functionality of OpenDelta. It is also in [must_try.py](https://github.com/thunlp/OpenDelta/tree/main/unittest/must_try.py) ```python # use tranformers as usual. diff --git a/unittest/test_accelerate.py b/unittest/test_accelerate.py deleted file mode 100644 index e69de29..0000000 diff --git a/unittest/test_bmtrain.py b/unittest/test_bmtrain.py deleted file mode 100644 index 5d843e7..0000000 --- a/unittest/test_bmtrain.py +++ /dev/null @@ -1,200 +0,0 @@ - - - - -import time -import random -import torch -import bmtrain as bmt -import numpy as np -import os -import csv - -from model_center import get_args -from model_center.model import CPM2 -from model_center.tokenizer import CPM2Tokenizer -from model_center.dataset.cpm2dataset import DATASET -from model_center.utils import print_inspect -from model_center.dataset import DistributedDataLoader - -def get_tokenizer(args): - tokenizer = CPM2Tokenizer.from_pretrained(args.model_config) - return tokenizer - -def get_model(args): - model = CPM2.from_pretrained(args.model_config) - return model - -def get_optimizer(args, model): - optimizer = bmt.optim.AdamOffloadOptimizer(model.parameters(), weight_decay=args.weight_decay) - return optimizer - -def get_learning_rate_scheduler(args, optimizer): - if args.lr_decay_iters is None: - args.lr_decay_iters = args.train_iters * args.epochs - if args.lr_decay_style == "noam": - lr_scheduler = bmt.lr_scheduler.Noam(optimizer, - start_lr = args.lr, - warmup_iter = args.warmup_iters, - end_iter = args.lr_decay_iters, - num_iter = args.start_step) - elif args.lr_decay_style == "constant": - lr_scheduler = bmt.lr_scheduler.NoDecay(optimizer, - start_lr = args.lr, - warmup_iter = args.warmup_iters, - end_iter = -1, - num_iter = args.start_step) - elif args.lr_decay_style == "linear": - lr_scheduler = bmt.lr_scheduler.Linear(optimizer, - start_lr = args.lr, - warmup_iter = args.warmup_iters, - end_iter = args.lr_decay_iters, - num_iter = args.start_step) - elif args.lr_decay_style == "exponential": - lr_scheduler = bmt.lr_scheduler.Exponential(optimizer, - start_lr = args.lr, - warmup_iter = args.warmup_iters, - end_iter = args.lr_decay_iters, - num_iter = args.start_step) - elif args.lr_decay_style == "cosine": - lr_scheduler = bmt.lr_scheduler.Cosine(optimizer, - start_lr = args.lr, - warmup_iter = args.warmup_iters, - end_iter = args.lr_decay_iters, - num_iter = args.start_step) - else: - raise ValueError(f"lr_scheduler of type {args.lr_decay_style} is not supported yet.") - - return lr_scheduler - -def setup_model_and_optimizer(args): - # get the tokenizer - tokenizer = get_tokenizer(args) - # get the model - model = get_model(args) - bmt.synchronize() - # get the optimizer and lr_scheduler - optimizer = get_optimizer(args, model) - lr_scheduler = get_learning_rate_scheduler(args, optimizer) - bmt.synchronize() - # get the memory usage - bmt.print_rank("Model mem\n", torch.cuda.memory_summary()) - bmt.synchronize() - return tokenizer, model, optimizer, lr_scheduler - -def initialize(): - # get arguments - args = get_args() - # init bmt - bmt.init_distributed(seed = args.seed) - # init save folder - if args.save != None: - os.makedirs(args.save, exist_ok=True) - return args - -def prepare_dataset(args, tokenizer, base_path, dataset_name, rank, world_size): - splits = ['train', 'dev', 'test'] - dataset = {} - for split in splits: - dataset[split] = DATASET[dataset_name](base_path, split, rank, world_size, tokenizer, args.max_encoder_length, args.max_decoder_length) - verbalizer = torch.LongTensor(DATASET[dataset_name].get_verbalizer(tokenizer)).cuda() - return dataset, verbalizer - - -def finetune(args, tokenizer, model, optimizer, lr_scheduler, dataset, verbalizer): - loss_func = bmt.loss.FusedCrossEntropy(ignore_index=-100) - - optim_manager = bmt.optim.OptimManager(loss_scale=args.loss_scale) - optim_manager.add_optimizer(optimizer, lr_scheduler) - - dataloader = { - "train": DistributedDataLoader(dataset['train'], batch_size=args.batch_size, shuffle=True), - "dev": DistributedDataLoader(dataset['dev'], batch_size=args.batch_size, shuffle=False), - "test": DistributedDataLoader(dataset['test'], batch_size=args.batch_size, shuffle=False), - } - - for epoch in range(5): - model.train() - for it, data in enumerate(dataloader['train']): - enc_input = data["enc_input"] - enc_length = data["enc_length"] - dec_input = data["dec_input"] - dec_length = data["dec_length"] - targets = data["targets"] - index = data["index"] - - logits = model(enc_input, enc_length, dec_input, dec_length) - logits = logits.index_select(dim=-1, index=verbalizer) - logits = logits[torch.where(index==1)] - - loss = loss_func(logits, targets) - global_loss = bmt.sum_loss(loss).item() - - optim_manager.zero_grad() - - optim_manager.backward(loss) - grad_norm = optim_manager.clip_grad_norm(optimizer.param_groups, args.clip_grad, norm_type = 2) - - optim_manager.step() - - bmt.print_rank( - "train | epoch {:3d} | Iter: {:6d}/{:6d} | loss: {:.4f} | lr: {:.4e}, scale: {:10.4f} | grad_norm: {:.4f} |".format( - epoch, - it, - len(dataloader["train"]), - global_loss, - lr_scheduler.current_lr, - int(optim_manager.loss_scale), - grad_norm, - ) - ) - # if it % args.inspect_iters == 0: print_inspect(model, "*") - # if args.save != None and it % args.save_iters == 0: - # bmt.save(model, os.path.join(args.save, args.save_name+("-%d.pt" % it))) - - model.eval() - with torch.no_grad(): - acc = 0 - total = 0 - for it, data in enumerate(dataloader['dev']): - enc_input = data["enc_input"] - enc_length = data["enc_length"] - dec_input = data["dec_input"] - dec_length = data["dec_length"] - targets = data["targets"] - index = data["index"] - - logits = model(enc_input, enc_length, dec_input, dec_length) - logits = logits.index_select(dim=-1, index=verbalizer) - logits = logits[torch.where(index==1)] - logits = logits.argmax(dim=-1) - - acc += torch.sum(logits == targets).item() - total += logits.shape[0] - bmt.print_rank( - "dev | epoch {:3d} | Iter: {:6d}/{:6d} | acc: {:6d} | total: {:6d} |".format( - epoch, - it, - len(dataloader["dev"]), - acc, - total, - ) - ) - acc = torch.tensor(acc / total).cuda() - acc = bmt.sum_loss(acc).cpu().item() - bmt.print_rank(f"dev epoch {epoch}: accuracy: {acc}") - -def main(): - args = initialize() - tokenizer, model, optimizer, lr_scheduler = setup_model_and_optimizer(args) - dataset, verbalizer = prepare_dataset( - args, - tokenizer, - f"{args.base_path}/down_data/paraphrase", - args.dataset_name, - bmt.rank(), bmt.world_size(), - ) - finetune(args, tokenizer, model, optimizer, lr_scheduler, dataset, verbalizer) - -if __name__ == "__main__": - main() diff --git a/unittest/test_half_precision.py b/unittest/test_half_precision.py deleted file mode 100644 index e69de29..0000000 diff --git a/unittest/test_main.py b/unittest/test_main.py deleted file mode 100644 index 139597f..0000000 --- a/unittest/test_main.py +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/unittest/user_defined.py b/unittest/user_defined.py deleted file mode 100644 index cd49e8b..0000000 --- a/unittest/user_defined.py +++ /dev/null @@ -1,182 +0,0 @@ -# Adapted from Tevatron (https://github.com/texttron/tevatron) - -from argparse import ArgumentParser -import logging -import os -import sys -import torch.nn as nn - -logger = logging.getLogger(__name__) - - -class UnitTest: - def __init__(self, models): - self.models = models - - self.Configs = {} - self.Configs[0] = { - "delta_type": "lora", - } - - self.Configs[1] = { - "delta_type": "bitfit", - } - - self.Configs[2] = { - "delta_type": "adapter", - } - - self.Configs[3] = { - "delta_type": "compacter", - } - - self.Configs[4] = { - "delta_type": "prefix", - } - - self.Configs[5] = { - "delta_type": "soft_prompt", - } - - self.Configs[6] = { - "delta_type": "low_rank_adapter", - } - - def get_delta_config(self, config_id): - return self.Configs[config_id] - - - def unitTest0(self, delta_config_dict): - model = self.models[0] - from opendelta import Visualization - Visualization(model).structure_graph() - - from opendelta import AutoDeltaConfig, AutoDeltaModel - - delta_config = AutoDeltaConfig.from_dict(delta_config_dict) - delta_model = AutoDeltaModel.from_config(delta_config, backbone_model = model) - - from opendelta import Visualization - Visualization(model).structure_graph() - - def unitTest1(self, delta_config_dict): - class Mymodel(nn.Module): - def __init__(self, a,b): - super().__init__() - self.a = a - self.b = b - - model = Mymodel(self.models[0], self.models[1]) - from opendelta import Visualization - Visualization(model).structure_graph() - - from opendelta import AutoDeltaConfig, AutoDeltaModel - - delta_config = AutoDeltaConfig.from_dict(delta_config_dict) - delta_model = AutoDeltaModel.from_config(delta_config, backbone_model = model) - - from opendelta import Visualization - Visualization(model).structure_graph() - delta_model.save_finetuned("./tmp") - - delta_model.freeze_module(exclude=['deltas']) - delta_model.save_finetuned("./tmp") - - model = Mymodel(self.models[0], self.models[1]) - Visualization(model).structure_graph() - delta_model = AutoDeltaModel.from_finetuned("./tmp", backbone_model=model) - Visualization(model).structure_graph() - - - - - - - - def unit_test(self, test_id, config_id): - delta_config_dict = self.Configs[config_id] - if test_id == 0: - self.unitTest0(delta_config_dict) - elif test_id == 1: - self.unitTest1(delta_config_dict) - - -from dataclasses import dataclass, field - -@dataclass -class UnitTestArguments: - """ - Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. - """ - config_id: int = field( - default=0, - ) - test_id: int = field( - default=0, - ) - model_name_or_path: str =field( - default='bert-base-cased', - metadata={"help": "tested: bert-base-cased, roberta-base, rinna/japanese-gpt2-small, t5-small, facebook/opt-125m"} - ) - - -from transformers import HfArgumentParser,TrainingArguments, AutoModel, GPT2Model - -def main(): - parser = HfArgumentParser((TrainingArguments, UnitTestArguments)) - - - if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): - training_args, unit_test_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) - else: - training_args, unit_test_args = parser.parse_args_into_dataclasses() - training_args: TrainingArguments - - if ( - os.path.exists(training_args.output_dir) - and os.listdir(training_args.output_dir) - and training_args.do_train - and not training_args.overwrite_output_dir - ): - raise ValueError( - f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome." - ) - - # Setup logging - logging.basicConfig( - format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", - datefmt="%m/%d/%Y %H:%M:%S", - level=logging.INFO if training_args.local_rank in [-1, 0] else logging.WARN, - ) - logger.warning( - "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s", - training_args.local_rank, - training_args.device, - training_args.n_gpu, - bool(training_args.local_rank != -1), - training_args.fp16, - ) - logger.info("Training/evaluation parameters %s", training_args) - - - model = AutoModel.from_pretrained(unit_test_args.model_name_or_path) - - import torch - import copy - models = [model, copy.deepcopy(model)] - - - unit_test = UnitTest(models) - - - unit_test.unit_test(unit_test_args.test_id, unit_test_args.config_id) - - - - - - - - -if __name__ == "__main__": - main()