From 561c1de0e559f7171e1292326512c3b6494b88ee Mon Sep 17 00:00:00 2001 From: guozr Date: Thu, 19 Sep 2024 14:54:03 +0800 Subject: [PATCH] npu adapted --- examples/examples_prompt/configs/gen_t5.py | 2 + .../examples_prompt/data_processors/tasks.py | 2 +- examples/examples_prompt/src/run_npu.py | 357 ++++++++++++++++++ examples/tutorial/0_regex.py | 2 +- examples/tutorial/1_with_openprompt_npu.py | 162 ++++++++ opendelta/utils/inspect.py | 22 +- requirements.txt | 2 +- 7 files changed, 543 insertions(+), 6 deletions(-) create mode 100644 examples/examples_prompt/src/run_npu.py create mode 100644 examples/tutorial/1_with_openprompt_npu.py diff --git a/examples/examples_prompt/configs/gen_t5.py b/examples/examples_prompt/configs/gen_t5.py index 7040fb6..c345a9d 100644 --- a/examples/examples_prompt/configs/gen_t5.py +++ b/examples/examples_prompt/configs/gen_t5.py @@ -3,6 +3,7 @@ import copy PATHBASE="/mnt/sfs_turbo/hsd/plm_cache/" # PATHBASE="/home/hushengding/plm_cache/" +PATHBASE="/home/guozr/Downloads/" AllConfigs = {} @@ -50,6 +51,7 @@ BaseConfigs['t5-base'] = { "save_strategy": "steps", "datasets_load_from_disk": True, "datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/", + "datasets_saved_path": f"{PATHBASE}huggingface_datasets/saved_to_disk/", "backbone_model": "t5", # use in delta center, "model_path_public": "t5-base", # use in delta center, diff --git a/examples/examples_prompt/data_processors/tasks.py b/examples/examples_prompt/data_processors/tasks.py index 7d0402a..79d67f5 100644 --- a/examples/examples_prompt/data_processors/tasks.py +++ b/examples/examples_prompt/data_processors/tasks.py @@ -366,7 +366,7 @@ class SuperGLUECB(AbstractTask): if offline == '1': return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/super_glue.cb")[split] else: - return datasets.load_dataset('super_glue', 'cb', split=split, script_version="master") + return datasets.load_dataset('super_glue', 'cb', split=split) class SuperGLUECOPA(AbstractTask): diff --git a/examples/examples_prompt/src/run_npu.py b/examples/examples_prompt/src/run_npu.py new file mode 100644 index 0000000..e4ded02 --- /dev/null +++ b/examples/examples_prompt/src/run_npu.py @@ -0,0 +1,357 @@ +# coding=utf-8 +# Copyright OpenDelta Team and THUNLP lab. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +A unified runing scripts for most models to do down stream tasks in a +prompt learning fashion, i.e., No classification head, all tasks are casted +to mask prediction or span prediction tasks. + +Processing relevant to different backbone models are stored in ../backbones/ + +Adding A few lines to integrate the Delta tuning methods. + +You can also adapt this script on your own tasks. +""" + +import os +import sys + +os.environ['MKL_THREADING_LAYER'] = 'GNU' +os.environ['MKL_SERVICE_FORCE_INTEL'] = '1' +os.environ["TOKENIZERS_PARALLELISM"] = "false" +sys.path.append(os.path.join(os.getcwd(), "../")) +# sys.path.append(os.path.join(os.getcwd(), "/mnt/sfs_turbo/zhangzhen/OpenDelta")) +sys.path.append(os.path.join(os.getcwd())) +os.environ['ASCEND_RT_VISIBLE_DEVICES'] = '0' # https://support.huaweicloud.com/bestpractice-modelarts/modelarts_10_4007.html + +import functools +import logging +import torch +import json +import numpy as np + +import transformers +from transformers import ( + AutoConfig, + AutoModelForMaskedLM, + AutoModelForSeq2SeqLM, + AutoTokenizer, + DataCollatorForSeq2Seq, + # HfArgumentParser, + # MBartTokenizer, + # default_data_collator, + Trainer, + Seq2SeqTrainer, + set_seed, +) +from transformers.trainer_utils import is_main_process, get_last_checkpoint + +from data_processors import AutoTask #, #TaskDataCollatorForSeq2Seq, AutoPostProcessor, data_collator +from utils import read_json, save_json +from utils.args import ModelArguments, TrainingArguments, DataTrainingArguments, DeltaArguments, RemainArgHfArgumentParser +import torch_npu +import transfer_to_npu + +logger = logging.getLogger(__name__) + + +def main(): + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + parser = RemainArgHfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments, DeltaArguments)) + + # You can provide a json file with contains the arguments and use the --argument some_arg to override or append to the json file. + json_file, cmd_args = (os.path.abspath(sys.argv[1]), sys.argv[2:]) if sys.argv[1].endswith(".json") else (None, sys.argv[1:]) + model_args, data_args, training_args, delta_args, remain_args = parser.parse_json_file_with_cmd_args(json_file=json_file, command_line_args=cmd_args) + logger.warning("The following arguments not used! {}".format(remain_args)) + + logger.info(f"The results will be used in {training_args.output_dir}/results.json") + # exit() + # Detecting last checkpoint. + last_checkpoint = None + if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir: + last_checkpoint = get_last_checkpoint(training_args.output_dir) + print("#### last_checkpoint ", last_checkpoint) + if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0: + ''' + raise ValueError( + f"Output directory ({training_args.output_dir}) already exists and is not empty. " + "Use --overwrite_output_dir to overcome." + ) + ''' + pass + elif last_checkpoint is not None: + logger.info( + f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change " + "the `--output_dir` or add `--overwrite_output_dir` to train from scratch." + ) + + # Setup logging + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN) + + # Log on each process the small summary: + logger.warning( + f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" + + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" + ) + # Set the verbosity to info of the Transformers logger (on main process only): + if is_main_process(training_args.local_rank): + transformers.utils.logging.set_verbosity_info() + # logger.info("Training/evaluation parameters %s", training_args, model_args, data_args, delta_args) + logger.info("{}\n{}\n{}\n{}".format(training_args, model_args, data_args, delta_args)) + + + # Set seed before initializing model. + set_seed(training_args.seed) + + + + if os.path.basename(model_args.model_name_or_path).startswith("t5") \ + or os.path.basename(model_args.model_name_or_path).startswith("long-t5") : + from examples_prompt.backbones.t5 import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts + from examples_prompt.backbones.t5 import Trainer, DataCollator + elif os.path.basename(model_args.model_name_or_path).startswith("blenderbot"): + from examples_prompt.backbones.blenderbot import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts + from examples_prompt.backbones.blenderbot import Trainer, DataCollator + elif os.path.basename(model_args.model_name_or_path).startswith("roberta") \ + or os.path.basename(model_args.model_name_or_path).startswith("bert") \ + or os.path.basename(model_args.model_name_or_path).startswith("albert") \ + or os.path.basename(model_args.model_name_or_path).startswith("xlm-roberta") \ + or os.path.basename(model_args.model_name_or_path).startswith("deberta") : + from examples_prompt.backbones.bert import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts + from examples_prompt.backbones.bert import Trainer, DataCollator + elif os.path.basename(model_args.model_name_or_path).startswith("beit"): + from examples_prompt.backbones.beit import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts + from examples_prompt.backbones.beit import Trainer, DataCollator + elif os.path.basename(model_args.model_name_or_path).startswith("bart"): + from examples_prompt.backbones.bart import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts + from examples_prompt.backbones.bart import Trainer, DataCollator + elif os.path.basename(model_args.model_name_or_path).startswith("bigbird"): + from examples_prompt.backbones.bigbird import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts + from examples_prompt.backbones.bigbird import Trainer, DataCollator + elif os.path.basename(model_args.model_name_or_path).startswith("clip"): + from examples_prompt.backbones.clip import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts + from examples_prompt.backbones.clip import Trainer, DataCollator + elif os.path.basename(model_args.model_name_or_path).startswith("opt") \ + or os.path.basename(model_args.model_name_or_path).startswith("gpt"): + from examples_prompt.backbones.opt import get_backbone, preprocess_function, mask_token_func, get_remove_columns, get_prompts + from examples_prompt.backbones.opt import Trainer, DataCollator + + + + + + config, tokenizer, model = get_backbone(model_args=model_args) + + # model parallelize + if hasattr(training_args, "model_parallel") and training_args.model_parallel: + logger.info('parallelize model!') + model.parallelize() + + from bigmodelvis import Visualization + Visualization(model).structure_graph() + + if delta_args.delta_type.lower() != "none": + from opendelta import AutoDeltaConfig,AutoDeltaModel + from dataclasses import asdict + delta_config = AutoDeltaConfig.from_dict(asdict(delta_args)) + delta_model = AutoDeltaModel.from_config(delta_config, backbone_model=model) + delta_model.freeze_module(set_state_dict = True) + delta_model.log(delta_ratio=True, trainable_ratio=True, visualization=True) + + + + + + performance_metrics = {} + + + + + non_empty_splits_names = [] + if training_args.do_train: + non_empty_splits_names.append("train") + if training_args.do_eval: + non_empty_splits_names.append("eval") + if training_args.do_test: + non_empty_splits_names.append("test") + splits = {} + for split_name in ['train', 'eval', 'test']: + if split_name not in non_empty_splits_names: + splits[split_name] = None + continue + + task = AutoTask.get(data_args.task_name, + data_args.dataset_config_name, + data_args=data_args, + seed=data_args.data_sample_seed) + + dataset = task.get(split=split_name, + split_validation_test=training_args.split_validation_test, + n_obs=data_args.max_train_samples) + + + + template, _verbalizer, tokenizer_wrapper = get_prompts(task, tokenizer, data_args) + + + dataset = dataset.map( + functools.partial(preprocess_function, + data_args=data_args, + tokenizer=tokenizer, + template=template, + verbalizer=_verbalizer, + tokenizer_wrapper=tokenizer_wrapper, + split=split_name), + batched=False, + num_proc=data_args.preprocessing_num_workers, + remove_columns=get_remove_columns(list(dataset.features.keys())), + load_from_cache_file=not data_args.overwrite_cache, + ) + # from IPython import embed; embed() + splits[split_name] = dataset + if split_name == "eval": + eval_task = task + verbalizer = _verbalizer + + + + trainer = Trainer( + model=model, + verbalizer=verbalizer, + eval_task=eval_task, + args=training_args, + train_dataset=splits['train'], + eval_dataset=splits['eval'], + tokenizer=tokenizer, + data_collator=DataCollator(tokenizer), + ) + + + def save_training_config(config_file, output_dir): + json_data = read_json(config_file) + save_json(os.path.join(output_dir, "training_config.json"), json_data) + + + # Saves training config. + if trainer.is_world_process_zero(): + save_training_config(sys.argv[1], training_args.output_dir) + + # Training + if training_args.do_train: + checkpoint = None + if training_args.resume_from_checkpoint is not None: + checkpoint = training_args.resume_from_checkpoint + elif last_checkpoint is not None: + checkpoint = last_checkpoint + + if training_args.compute_time: + torch.cuda.synchronize() # wait for move to complete + start = torch.cuda.Event(enable_timing=True) + end = torch.cuda.Event(enable_timing=True) + start.record() + + train_result = trainer.train(resume_from_checkpoint=checkpoint) + + if training_args.compute_time: + end.record() + torch.cuda.synchronize() # wait for all_reduce to complete + total_time = start.elapsed_time(end)/(1000*60) + performance_metrics.update({"total_time in minutes ": total_time}) + + trainer.save_model() # Saves the tokenizer too for easy upload + train_metrics = train_result.metrics + max_train_samples = ( + data_args.max_train_samples if data_args.max_train_samples is not None else len(splits['train']) + ) + train_metrics["train_samples"] = min(max_train_samples, len(splits['train'])) + trainer.log_metrics("train", train_metrics) + trainer.save_metrics("train", train_metrics) + trainer.save_state() + + if torch.cuda.is_available() and training_args.compute_memory: + peak_memory = (torch.cuda.max_memory_allocated() / 1024 ** 2)/1000 + performance_metrics.update({"peak_memory": peak_memory}) + if training_args.compute_memory or training_args.compute_time: + logger.info("Efficiency Statistics {}".format(performance_metrics)) + trainer.save_metrics("performance", performance_metrics) + + # Evaluation + all_results = {} + + all_results['evaluate'] = {} + + if training_args.do_eval: + logger.info("*** Evaluate ***") + + metrics = trainer.evaluate(eval_dataset=splits['eval'], + ) + trainer.log_metrics(f"{data_args.task_name}_eval", metrics) + trainer.save_metrics(f"{data_args.task_name}_eval", metrics) + all_results['evaluate'][data_args.task_name] = metrics + + # Test + all_results['test'] = {} + if training_args.do_test: + logger.info("*** Test ***") + metrics = trainer.evaluate(eval_dataset=splits['test'], + metric_key_prefix="test" + ) + trainer.log_metrics(f"{data_args.task_name}_test", metrics) + trainer.save_metrics(f"{data_args.task_name}_test", metrics) + all_results['test'][data_args.task_name] = metrics + + # from opendelta.utils.delta_hub import create_hub_repo_name + # from opendelta.utils.delta_center import create_delta_center_args, create_repo_name + + # repo_name = create_hub_repo_name(root="DeltaHub", + # dataset=data_args.task_name, + # delta_type = delta_args.delta_type, + # model_name_or_path= model_args.model_name_or_path) + + # center_args = + # repo_name = create_repo_name(prefix="", center_args=center_args) + # all_results['repo_name'] = repo_name + + + delta_model.save_finetuned(finetuned_delta_path=delta_args.finetuned_delta_path, + push_to_dc=training_args.push_to_dc, + center_args={"test_performance":all_results['test'][data_args.task_name]['test_average_metrics'], + }, + center_args_pool = {**vars(model_args), **vars(data_args), **vars(training_args), **vars(delta_args)}, + list_tags = ['NLI'], + dict_tags = {'purpose':'for testing'}, + delay_push=True, + test_result=all_results['test'] + ) + + + + with open(f"{training_args.output_dir}/results.json", 'w') as fout: + string = json.dumps(all_results, indent=4,sort_keys=True) + fout.write(string+"\n") + + return all_results + + + + +if __name__ == "__main__": + result = main() diff --git a/examples/tutorial/0_regex.py b/examples/tutorial/0_regex.py index 6d884d7..af7c216 100644 --- a/examples/tutorial/0_regex.py +++ b/examples/tutorial/0_regex.py @@ -2,7 +2,7 @@ from transformers import AutoModelForSequenceClassification model = AutoModelForSequenceClassification.from_pretrained("roberta-base") # suppose we load BART -from opendelta import Visualization +from bigmodelvis import Visualization print("before modify") Visualization(model).structure_graph() diff --git a/examples/tutorial/1_with_openprompt_npu.py b/examples/tutorial/1_with_openprompt_npu.py new file mode 100644 index 0000000..5ba5683 --- /dev/null +++ b/examples/tutorial/1_with_openprompt_npu.py @@ -0,0 +1,162 @@ +""" +This tutorial is a copy of OpenPrompt's tutorial/1.1_mixed_template.py +The only modification is in lines 98 to 102 + +1. OpenPrompt provides pre-processing of data, such as prompt template formatting +2. OpenPrompt pre-process the model input, such as: prompt soft embedding +3. OpenDelta modify the backbone model, such as: Adapter, Lora, Compactor, etc. +4. OpenPrompt post-process the model output, such as: extract logits at position, apply prompt verbalizer +""" + +# load dataset +from datasets import load_dataset +from datasets import load_from_disk +raw_dataset = load_dataset('super_glue', 'cb', + # cache_dir="../datasets/.cache/huggingface_datasets" + ) +# raw_dataset = load_from_disk("/home/hx/huggingface_datasets/saved_to_disk/super_glue.cb") +# Note that if you are running this scripts inside a GPU cluster, there are chances are you are not able to connect to huggingface website directly. +# In this case, we recommend you to run `raw_dataset = load_dataset(...)` on some machine that have internet connections. +# Then use `raw_dataset.save_to_disk(path)` method to save to local path. +# Thirdly upload the saved content into the machiine in cluster. +# Then use `load_from_disk` method to load the dataset. + +from openprompt.data_utils import InputExample + +dataset = {} +for split in ['train', 'validation', 'test']: + dataset[split] = [] + for data in raw_dataset[split]: + input_example = InputExample(text_a = data['premise'], text_b = data['hypothesis'], label=int(data['label']), guid=data['idx']) + dataset[split].append(input_example) +print(dataset['train'][0]) + +# You can load the plm related things provided by openprompt simply by calling: +from openprompt.plms import load_plm +plm, tokenizer, model_config, WrapperClass = load_plm("t5", "t5-base") + +# Constructing Template +# A template can be constructed from the yaml config, but it can also be constructed by directly passing arguments. +from openprompt.prompts import MixedTemplate +template_text = '{"placeholder":"text_a"} {"soft"} {"soft"} {"soft"} {"placeholder":"text_b"}? {"soft"} {"soft"} {"soft"} {"mask"}.' +mytemplate = MixedTemplate(model=plm, tokenizer=tokenizer, text=template_text) + +# To better understand how does the template wrap the example, we visualize one instance. + +wrapped_example = mytemplate.wrap_one_example(dataset['train'][0]) +print(wrapped_example) + +# Now, the wrapped example is ready to be pass into the tokenizer, hence producing the input for language models. +# You can use the tokenizer to tokenize the input by yourself, but we recommend using our wrapped tokenizer, which is a wrapped tokenizer tailed for InputExample. +# The wrapper has been given if you use our `load_plm` function, otherwise, you should choose the suitable wrapper based on +# the configuration in `openprompt.plms.__init__.py`. +# Note that when t5 is used for classification, we only need to pass to decoder. +# The loss is calcaluted at . Thus passing decoder_max_length=3 saves the space +wrapped_t5tokenizer = WrapperClass(max_seq_length=128, decoder_max_length=3, tokenizer=tokenizer,truncate_method="head") +# or +from openprompt.plms import T5TokenizerWrapper +wrapped_t5tokenizer= T5TokenizerWrapper(max_seq_length=128, decoder_max_length=3, tokenizer=tokenizer,truncate_method="head") + +# You can see what a tokenized example looks like by +tokenized_example = wrapped_t5tokenizer.tokenize_one_example(wrapped_example, teacher_forcing=False) +print(tokenized_example) +print(tokenizer.convert_ids_to_tokens(tokenized_example['input_ids'])) +print(tokenizer.convert_ids_to_tokens(tokenized_example['decoder_input_ids'])) + +# Now it's time to convert the whole dataset into the input format! +# Simply loop over the dataset to achieve it! + +model_inputs = {} +for split in ['train', 'validation', 'test']: + model_inputs[split] = [] + for sample in dataset[split]: + tokenized_example = wrapped_t5tokenizer.tokenize_one_example(mytemplate.wrap_one_example(sample), teacher_forcing=False) + model_inputs[split].append(tokenized_example) + + +# We provide a `PromptDataLoader` class to help you do all the above matters and wrap them into an `torch.DataLoader` style iterator. +from openprompt import PromptDataLoader + +train_dataloader = PromptDataLoader(dataset=dataset["train"], template=mytemplate, tokenizer=tokenizer, + tokenizer_wrapper_class=WrapperClass, max_seq_length=256, decoder_max_length=3, + batch_size=4,shuffle=True, teacher_forcing=False, predict_eos_token=False, + truncate_method="head") + + +# Define the verbalizer +# In classification, you need to define your verbalizer, which is a mapping from logits on the vocabulary to the final label probability. Let's have a look at the verbalizer details: + +from openprompt.prompts import ManualVerbalizer +import torch + +# for example the verbalizer contains multiple label words in each class +myverbalizer = ManualVerbalizer(tokenizer, num_classes=3, label_words=[["yes"], ["no"], ["maybe"]]) + +print("label_words_ids", myverbalizer.label_words_ids) + +# Although you can manually combine the plm, template, verbalizer together, we provide a pipeline +# model which take the batched data from the PromptDataLoader and produce a class-wise logits + +from opendelta import LoraModel +# delta_model = LoraModel(backbone_model=plm, modified_modules=[]) +delta_model = LoraModel(backbone_model=plm, modified_modules=["SelfAttention.q", "SelfAttention.v"]) +delta_model.freeze_module(exclude=["deltas"], set_state_dict=True) +delta_model.log() + +from openprompt import PromptForClassification + +use_npu = True +prompt_model = PromptForClassification(plm=plm, template=mytemplate, verbalizer=myverbalizer) +if use_npu : + prompt_model = prompt_model.npu() + +# Now the training is standard +from transformers import AdamW, get_linear_schedule_with_warmup +loss_func = torch.nn.CrossEntropyLoss() +no_decay = ['bias', 'LayerNorm.weight'] +# it's always good practice to set no decay to biase and LayerNorm parameters +optimizer_grouped_parameters = [ + {'params': [p for n, p in prompt_model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, + {'params': [p for n, p in prompt_model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} +] +print([n for n, p in prompt_model.named_parameters()]) + +optimizer = AdamW(optimizer_grouped_parameters, lr=1e-4) + +for epoch in range(30): + tot_loss = 0 + for step, inputs in enumerate(train_dataloader): + if use_npu: + # The inputs instance is of type InputFeature, which inherits from dict. + # The to() method can move it to other devices. The cuda() method is a wrapper for to(), specifically moving to CUDA devices. + # If you want to move it to an NPU device, you can directly use the underlying to() method. + inputs = inputs.to("npu") + delta_model.log() + logits = prompt_model(inputs) + labels = inputs['label'] + loss = loss_func(logits, labels) + loss.backward() + tot_loss += loss.item() + optimizer.step() + optimizer.zero_grad() + if step %100 ==1: + print("Epoch {}, average loss: {}".format(epoch, tot_loss/(step+1)), flush=True) + +# Evaluate +validation_dataloader = PromptDataLoader(dataset=dataset["validation"], template=mytemplate, tokenizer=tokenizer, + tokenizer_wrapper_class=WrapperClass, max_seq_length=256, decoder_max_length=3, + batch_size=4,shuffle=False, teacher_forcing=False, predict_eos_token=False, + truncate_method="head") + +allpreds = [] +alllabels = [] +for step, inputs in enumerate(validation_dataloader): + if use_npu: + inputs = inputs.to("npu") + logits = prompt_model(inputs) + labels = inputs['label'] + alllabels.extend(labels.cpu().tolist()) + allpreds.extend(torch.argmax(logits, dim=-1).cpu().tolist()) + +acc = sum([int(i==j) for i,j in zip(allpreds, alllabels)])/len(allpreds) +print(acc) \ No newline at end of file diff --git a/opendelta/utils/inspect.py b/opendelta/utils/inspect.py index 830298e..c900d1d 100644 --- a/opendelta/utils/inspect.py +++ b/opendelta/utils/inspect.py @@ -3,9 +3,20 @@ import torch import torch.nn as nn from typing import Optional import opendelta.utils.logging as logging +import importlib logger = logging.get_logger(__name__) +def is_torch_npu_available(): + if importlib.util.find_spec("torch_npu") is None: + return False + + import torch + import torch_npu + + return hasattr(torch, "npu") and torch.npu.is_available() + + def inspect_module_statistics(module: Optional[nn.Module]=None, verbose=True): r"""Get the statistics of the parameters in the delta modules. @@ -34,9 +45,14 @@ def inspect_module_statistics(module: Optional[nn.Module]=None, verbose=True): cudamem = 0 maxcudamem = 0 - for device_id in range(torch.cuda.device_count()): - cudamem += torch.cuda.memory_allocated(f"cuda:{device_id}")/1024**3 - maxcudamem += torch.cuda.max_memory_allocated(f"cuda:{device_id}")/1024**3 + if is_torch_npu_available(): + for device_id in range(torch.npu.device_count()): + cudamem += torch.npu.memory_allocated(f"npu:{device_id}")/1024**3 + maxcudamem += torch.npu.max_memory_allocated(f"npu:{device_id}")/1024**3 + else: + for device_id in range(torch.cuda.device_count()): + cudamem += torch.cuda.memory_allocated(f"cuda:{device_id}")/1024**3 + maxcudamem += torch.cuda.max_memory_allocated(f"cuda:{device_id}")/1024**3 stat['cudamem'] = cudamem stat['maxcudamem'] = maxcudamem diff --git a/requirements.txt b/requirements.txt index 2862394..d18e55b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ torch>=1.8.0 -transformers>=4.10.0 +transformers>=4.10.0,<=4.27.1 datasets>=1.17.0 sentencepiece>=0.1.96 tqdm>=4.62.2