resolve conflict
This commit is contained in:
commit
357c74ef59
|
@ -50,4 +50,6 @@ scripts
|
|||
t.py
|
||||
t.sh
|
||||
!examples/examples_prompt/configs/*/*.json
|
||||
!examples/examples_prompt/configs/**
|
||||
!examples/examples_prompt/configs/**
|
||||
**/delta_checkpoints/
|
||||
**/outputs/
|
||||
|
|
|
@ -72,6 +72,11 @@ python setup.py install
|
|||
python setup.py develop
|
||||
```
|
||||
|
||||
If you encounter network error using setup.py, please firstly install the dependencies via
|
||||
```shell
|
||||
pip install -r requirements.txt && python setup.py develop
|
||||
```
|
||||
|
||||
## Must Try
|
||||
|
||||
```python
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1,24 +1,59 @@
|
|||
# !!!!This example collection is still under develop, please wait for some time to use it.
|
||||
# Examples of using opendelta together with 🤗 transformers.
|
||||
|
||||
## install the repo
|
||||
In this repo, we construct a very general pipeline to train and test a PLM using
|
||||
🤗 transformers.
|
||||
|
||||
The pipeline was constructed together with [openpromptu](https://pypi.org/project/openpromptu/), which is a light and
|
||||
model-agnostic version of [openprompt](https://github.com/thunlp/OpenPrompt).
|
||||
|
||||
## Pool of PLMs
|
||||
We are going to adapt most of the models in 🤗 transformers
|
||||
in the repos. The different pipeline, processing, or configurations are specified
|
||||
in `./backbones/`. You can add your own model in this file to support customized models.
|
||||
|
||||
|
||||
### A example script to run the repo in offline mode
|
||||
```bash
|
||||
cd ../
|
||||
python setup_seq2seq.py develop
|
||||
conda activate [YOURENV]
|
||||
PATHBASE=[YOURPATH]
|
||||
|
||||
JOBNAME="adapter_t5-base"
|
||||
DATASET="superglue-cb"
|
||||
|
||||
cd $PATHBASE/OpenDelta/examples/examples_prompt/
|
||||
python configs/gen_t5.py --job $JOBNAME
|
||||
|
||||
export TRANSFORMERS_OFFLINE=1
|
||||
export HF_DATASETS_OFFLINE=1
|
||||
python src/run.py configs/$JOBNAME/$DATASET.json \
|
||||
--model_name_or_path [YOURPATH_TO_T5_BASE] \
|
||||
--tokenizer_name [YOURPATH_TO_T5_BASE] \
|
||||
--datasets_saved_path [YOURPATH_TO_CB_DATASETS] \
|
||||
--finetuned_delta_path ${PATHBASE}/delta_checkpoints/ \
|
||||
--num_train_epochs 20 \
|
||||
--bottleneck_dim 24 \
|
||||
--delay_push True
|
||||
```
|
||||
This will add `examples_seq2seq` to the environment path of the python lib.
|
||||
|
||||
## Generating the json configuration file
|
||||
## A example of quick testing the repo.
|
||||
|
||||
```shell
|
||||
python configs/gen_$BACKBONETYPE.py --job $YOURJOB
|
||||
#e.g. python configs/gen_beit.py --job lora_beit-base-patch16-224
|
||||
```
|
||||
The available job configuration (e.g., `--job lora_beit-base-patch16-224`) can be seen from the scripts. You can also
|
||||
create your only configuration.
|
||||
```bash
|
||||
conda activate [YOURENV]
|
||||
PATHBASE=[YOURPATH]
|
||||
|
||||
JOBNAME="adapter_t5-base"
|
||||
DATASET="superglue-cb"
|
||||
|
||||
## Run the code
|
||||
cd $PATHBASE/OpenDelta/examples/examples_prompt/
|
||||
|
||||
```
|
||||
CUDA_VISIBLE_DEVICES=1 python src/run.py configs/lora_beit-base-patch16-224/beans.json
|
||||
```
|
||||
export TRANSFORMERS_OFFLINE=1
|
||||
export HF_DATASETS_OFFLINE=1
|
||||
export DELTACENTER_OFFLINE=0
|
||||
python src/test.py configs/$JOBNAME/$DATASET.json \
|
||||
--model_name_or_path [YOURPATH_TO_T5_BASE] \
|
||||
--tokenizer_name [YOURPATH_TO_T5_BASE] \
|
||||
--datasets_saved_path [YOURPATH_TO_CB_DATASETS] \
|
||||
--finetuned_delta_path thunlp/t5-base_adapter_superglue-cb_20220701171436c80 \
|
||||
--delta_cache_dir "./delta_checkpoints/" \
|
||||
--force_download True
|
||||
```
|
|
@ -26,14 +26,14 @@ def preprocess_function(raw_example, **kwargs):
|
|||
example = InputExample(**raw_example)
|
||||
|
||||
|
||||
try:
|
||||
example = verbalizer.wrap_one_example(example)
|
||||
example, other = template.wrap_one_example(example)
|
||||
input_sentence = tokenizer_wrapper.merge_wrapped_example(example)
|
||||
model_inputs = tokenizer(input_sentence, max_length=256,
|
||||
padding="max_length", truncation=True)
|
||||
except:
|
||||
from IPython import embed; embed(header="Therer")
|
||||
|
||||
example = verbalizer.wrap_one_example(example)
|
||||
example, other = template.wrap_one_example(example)
|
||||
input_sentence = tokenizer_wrapper.merge_wrapped_example(example)
|
||||
model_inputs = tokenizer(input_sentence, max_length=256,
|
||||
padding="max_length", truncation=True)
|
||||
|
||||
|
||||
|
||||
with tokenizer.as_target_tokenizer():
|
||||
label = tokenizer(other['tgt_text']).input_ids
|
||||
|
|
|
@ -26,14 +26,13 @@ def preprocess_function(raw_example, **kwargs):
|
|||
example = InputExample(**raw_example)
|
||||
|
||||
|
||||
try:
|
||||
example = verbalizer.wrap_one_example(example)
|
||||
example, other = template.wrap_one_example(example)
|
||||
input_sentence = tokenizer_wrapper.merge_wrapped_example(example)
|
||||
model_inputs = tokenizer(input_sentence, max_length=data_args.max_source_length,
|
||||
padding="max_length", truncation=True)
|
||||
except:
|
||||
from IPython import embed; embed(header="Therer")
|
||||
|
||||
example = verbalizer.wrap_one_example(example)
|
||||
example, other = template.wrap_one_example(example)
|
||||
input_sentence = tokenizer_wrapper.merge_wrapped_example(example)
|
||||
model_inputs = tokenizer(input_sentence, max_length=data_args.max_source_length,
|
||||
padding="max_length", truncation=True)
|
||||
|
||||
|
||||
with tokenizer.as_target_tokenizer():
|
||||
label = tokenizer(other['tgt_text']).input_ids
|
||||
|
|
|
@ -26,14 +26,13 @@ def preprocess_function(raw_example, **kwargs):
|
|||
example = InputExample(**raw_example)
|
||||
|
||||
|
||||
try:
|
||||
example = verbalizer.wrap_one_example(example)
|
||||
example, other = template.wrap_one_example(example)
|
||||
input_sentence = tokenizer_wrapper.merge_wrapped_example(example)
|
||||
model_inputs = tokenizer(input_sentence, max_length=256,
|
||||
padding="max_length", truncation=True)
|
||||
except:
|
||||
from IPython import embed; embed(header="Therer")
|
||||
|
||||
example = verbalizer.wrap_one_example(example)
|
||||
example, other = template.wrap_one_example(example)
|
||||
input_sentence = tokenizer_wrapper.merge_wrapped_example(example)
|
||||
model_inputs = tokenizer(input_sentence, max_length=256,
|
||||
padding="max_length", truncation=True)
|
||||
|
||||
|
||||
with tokenizer.as_target_tokenizer():
|
||||
label = tokenizer(other['tgt_text']).input_ids
|
||||
|
|
|
@ -1,59 +0,0 @@
|
|||
# the final results will be populated here.{
|
||||
"evaluate": {
|
||||
"epoch": 20.0,
|
||||
"eval_accuracy": 89.2156862745098,
|
||||
"eval_average_metrics": 90.76168929110105,
|
||||
"eval_f1": 92.3076923076923,
|
||||
"eval_loss": 0.16493959724903107,
|
||||
"eval_runtime": 1.6391,
|
||||
"eval_samples_per_second": 124.455
|
||||
},
|
||||
"repo_name": "DeltaHub/bitfit_t5-base_mrpc",
|
||||
"test": {
|
||||
"epoch": 20.0,
|
||||
"test_accuracy": 88.23529411764706,
|
||||
"test_average_metrics": 89.97971602434077,
|
||||
"test_f1": 91.72413793103448,
|
||||
"test_loss": 0.14968213438987732,
|
||||
"test_runtime": 1.6344,
|
||||
"test_samples_per_second": 124.82
|
||||
}
|
||||
}
|
||||
{
|
||||
"evaluate": {
|
||||
"epoch": 20.0,
|
||||
"eval_average_metrics": 52.10265668831534,
|
||||
"eval_loss": 0.3603779077529907,
|
||||
"eval_matthews_correlation": 52.10265668831534,
|
||||
"eval_runtime": 1.0808,
|
||||
"eval_samples_per_second": 482.046
|
||||
},
|
||||
"repo_name": "DeltaHub/bitfit_t5-base_cola",
|
||||
"test": {
|
||||
"epoch": 20.0,
|
||||
"test_average_metrics": 54.209563471221934,
|
||||
"test_loss": 0.2853100299835205,
|
||||
"test_matthews_correlation": 54.209563471221934,
|
||||
"test_runtime": 1.056,
|
||||
"test_samples_per_second": 494.304
|
||||
}
|
||||
}
|
||||
{
|
||||
"evaluate": {
|
||||
"epoch": 20.0,
|
||||
"eval_average_metrics": 53.80613287067274,
|
||||
"eval_loss": 0.25723716616630554,
|
||||
"eval_matthews_correlation": 53.80613287067274,
|
||||
"eval_runtime": 1.0583,
|
||||
"eval_samples_per_second": 492.299
|
||||
},
|
||||
"repo_name": "DeltaHub/bitfit_t5-base_cola",
|
||||
"test": {
|
||||
"epoch": 20.0,
|
||||
"test_average_metrics": 54.32497579543861,
|
||||
"test_loss": 0.22327613830566406,
|
||||
"test_matthews_correlation": 54.32497579543861,
|
||||
"test_runtime": 1.0556,
|
||||
"test_samples_per_second": 494.507
|
||||
}
|
||||
}
|
|
@ -13,12 +13,11 @@ import numpy as np
|
|||
import torch
|
||||
import re
|
||||
import itertools
|
||||
|
||||
import os
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
||||
from transformers.models.auto.tokenization_auto import tokenizer_class_from_name
|
||||
|
||||
from typing import List, Dict
|
||||
|
@ -63,7 +62,8 @@ class COLA(AbstractTask):
|
|||
}
|
||||
|
||||
def load_dataset(self, split):
|
||||
if self.data_args.datasets_load_from_disk:
|
||||
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
|
||||
if offline == '1':
|
||||
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/glue.cola")[split]
|
||||
else:
|
||||
return datasets.load_dataset('glue', 'cola',
|
||||
|
@ -91,7 +91,8 @@ class SST2(AbstractTask):
|
|||
}
|
||||
|
||||
def load_dataset(self, split):
|
||||
if self.data_args.datasets_load_from_disk:
|
||||
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
|
||||
if offline == '1':
|
||||
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/glue.sst2")[split]
|
||||
else:
|
||||
return datasets.load_dataset('glue', 'sst2',
|
||||
|
@ -118,10 +119,9 @@ class MRPC(AbstractTask):
|
|||
}
|
||||
|
||||
|
||||
|
||||
|
||||
def load_dataset(self, split):
|
||||
if self.data_args.datasets_load_from_disk:
|
||||
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
|
||||
if offline == '1':
|
||||
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/glue.mrpc")[split]
|
||||
else:
|
||||
return datasets.load_dataset('glue', 'mrpc', split=split, script_version="master")
|
||||
|
@ -147,7 +147,8 @@ class QQP(AbstractTask):
|
|||
|
||||
|
||||
def load_dataset(self, split):
|
||||
if self.data_args.datasets_load_from_disk:
|
||||
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
|
||||
if offline == '1':
|
||||
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/glue.qqp")[split]
|
||||
else:
|
||||
return datasets.load_dataset('glue', 'qqp',
|
||||
|
@ -203,7 +204,8 @@ class MNLI(AbstractTask):
|
|||
}
|
||||
|
||||
def load_dataset(self, split):
|
||||
if self.data_args.datasets_load_from_disk:
|
||||
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
|
||||
if offline == '1':
|
||||
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/glue.mnli")[split]
|
||||
else:
|
||||
return datasets.load_dataset('glue', 'mnli', split=split, script_version="master")
|
||||
|
@ -238,7 +240,8 @@ class QNLI(AbstractTask):
|
|||
|
||||
|
||||
def load_dataset(self, split):
|
||||
if self.data_args.datasets_load_from_disk:
|
||||
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
|
||||
if offline == '1':
|
||||
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/glue.qnli")[split]
|
||||
else:
|
||||
return datasets.load_dataset('glue', 'qnli', split=split, script_version="master")
|
||||
|
@ -274,7 +277,8 @@ class RTE(AbstractTask):
|
|||
}
|
||||
|
||||
def load_dataset(self, split):
|
||||
if self.data_args.datasets_load_from_disk:
|
||||
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
|
||||
if offline == '1':
|
||||
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/glue.rte")[split]
|
||||
else:
|
||||
return datasets.load_dataset('glue', 'rte',
|
||||
|
@ -301,7 +305,8 @@ class WNLI(AbstractTask):
|
|||
|
||||
|
||||
def load_dataset(self, split):
|
||||
if self.data_args.datasets_load_from_disk:
|
||||
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
|
||||
if offline == '1':
|
||||
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/glue.wnli")[split]
|
||||
else:
|
||||
return datasets.load_dataset('glue', 'wnli', split=split, script_version="master")
|
||||
|
@ -329,7 +334,8 @@ class SuperGLUEBoolQ(AbstractTask):
|
|||
}
|
||||
|
||||
def load_dataset(self, split):
|
||||
if self.data_args.datasets_load_from_disk:
|
||||
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
|
||||
if offline == '1':
|
||||
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/super_glue.boolq")[split]
|
||||
else:
|
||||
return datasets.load_dataset('super_glue', 'boolq', split=split, script_version="master")
|
||||
|
@ -342,7 +348,7 @@ class SuperGLUECB(AbstractTask):
|
|||
split_to_data_split = {"train": "train",
|
||||
"validation": "validation",
|
||||
"test": "validation"}
|
||||
metric = [metrics.mean_multiclass_f1(num_classes=3), metrics.accuracy]
|
||||
metric = [metrics.accuracy]
|
||||
metric_names = ["accuracy"]
|
||||
|
||||
verbalizers = {
|
||||
|
@ -356,7 +362,8 @@ class SuperGLUECB(AbstractTask):
|
|||
}
|
||||
|
||||
def load_dataset(self, split):
|
||||
if self.data_args.datasets_load_from_disk:
|
||||
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
|
||||
if offline == '1':
|
||||
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/super_glue.cb")[split]
|
||||
else:
|
||||
return datasets.load_dataset('super_glue', 'cb', split=split, script_version="master")
|
||||
|
@ -382,7 +389,8 @@ class SuperGLUECOPA(AbstractTask):
|
|||
}
|
||||
|
||||
def load_dataset(self, split):
|
||||
if self.data_args.datasets_load_from_disk:
|
||||
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
|
||||
if offline == '1':
|
||||
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/super_glue.copa")[split]
|
||||
else:
|
||||
return datasets.load_dataset('super_glue', 'copa', split=split, script_version="master")
|
||||
|
@ -411,7 +419,8 @@ class SuperGLUEMultiRC(AbstractTask):
|
|||
|
||||
|
||||
def load_dataset(self, split):
|
||||
if self.data_args.datasets_load_from_disk:
|
||||
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
|
||||
if offline == '1':
|
||||
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/super_glue.multirc")[split]
|
||||
else:
|
||||
return datasets.load_dataset('super_glue', 'multirc', split=split, script_version="master")
|
||||
|
@ -454,7 +463,8 @@ class SuperGLUEWIC(AbstractTask):
|
|||
}
|
||||
|
||||
def load_dataset(self, split):
|
||||
if self.data_args.datasets_load_from_disk:
|
||||
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
|
||||
if offline == '1':
|
||||
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/super_glue.wic")[split]
|
||||
else:
|
||||
return datasets.load_dataset('super_glue', 'wic', split=split, script_version="master")
|
||||
|
@ -544,7 +554,8 @@ class Beans(AbstractTask):
|
|||
|
||||
def load_dataset(self, split):
|
||||
# from IPython import embed; embed(header="beans")
|
||||
if self.data_args.datasets_load_from_disk:
|
||||
offline = os.environ.get("HF_DATASETS_OFFLINE", "0")
|
||||
if offline == '1':
|
||||
return datasets.load_from_disk(f"{self.data_args.datasets_saved_path}/beans")[split]
|
||||
else:
|
||||
return datasets.load_dataset('beans', split=split, script_version="master")
|
||||
|
|
|
@ -152,9 +152,13 @@ def main():
|
|||
Visualization(model).structure_graph()
|
||||
|
||||
if delta_args.delta_type.lower() != "none":
|
||||
from opendelta import AutoDeltaConfig,AutoDeltaModel
|
||||
# delta_config = AutoDeltaConfig.from_dict(vars(delta_args))
|
||||
delta_model = AutoDeltaModel.from_finetuned(finetuned_model_path=delta_args.finetuned_model_path, cache_dir="saved_ckpts", backbone_model=model)
|
||||
from opendelta.delta_models.adapter import AdapterConfig, AdapterModel
|
||||
delta_config = AdapterConfig.from_finetuned(finetuned_delta_path=delta_args.finetuned_delta_path)
|
||||
delta_model = AdapterModel.from_finetuned(finetuned_delta_path=delta_args.finetuned_delta_path,
|
||||
delta_config=delta_config,
|
||||
backbone_model=model,
|
||||
force_download=delta_args.force_download,
|
||||
cache_dir=delta_args.delta_cache_dir)
|
||||
# delta_model.freeze_module(set_state_dict = True)
|
||||
delta_model.log(delta_ratio=True, trainable_ratio=True, visualization=True)
|
||||
|
||||
|
|
|
@ -243,6 +243,21 @@ class DeltaArguments:
|
|||
finetuned_delta_path: Optional[str] = field(
|
||||
default=None, metadata={"help": "the path of the finetuned delta model"}
|
||||
)
|
||||
force_download: Optional[bool] = field(
|
||||
default=False, metadata={"help": "whether to download the checkpoint form delta center no matter whether it exists"}
|
||||
)
|
||||
local_files_only: Optional[bool] = field(
|
||||
default=False, metadata={"help": "whether not to look for file in delta center"}
|
||||
)
|
||||
delta_cache_dir: Optional[str] = field(
|
||||
default=None, metadata={"help": "The cache path defined by user. If not set, we will firstly look into the"+
|
||||
" working directory and then into the default cache path (ususally ~/.cache/delta_center)."}
|
||||
)
|
||||
delay_push: Optional[bool] = field(
|
||||
default=True, metadata={
|
||||
'help':'whether push the checkpoint to delta center later.'
|
||||
}
|
||||
)
|
||||
|
||||
def merge_arguments(self, objb):
|
||||
print(objb)
|
||||
|
|
|
@ -1,48 +0,0 @@
|
|||
"""Install Compacter."""
|
||||
import os
|
||||
import setuptools
|
||||
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
|
||||
|
||||
#os.environ['TORCH_CUDA_ARCH_LIST']="3.5;3.7;6.1;7.0;7.5;8.6+PTX"
|
||||
|
||||
def setup_package():
|
||||
long_description = "examples_prompt"
|
||||
setuptools.setup(
|
||||
name='examples_prompt',
|
||||
version='0.0.1',
|
||||
description='textual prompt example',
|
||||
long_description=long_description,
|
||||
long_description_content_type='text/markdown',
|
||||
author='Shengding Hu',
|
||||
license='MIT License',
|
||||
packages=setuptools.find_packages(
|
||||
exclude=['docs', 'tests', 'scripts']),
|
||||
dependency_links=[
|
||||
'https://download.pytorch.org/whl/torch_stable.html',
|
||||
],
|
||||
classifiers=[
|
||||
'Intended Audience :: Developers',
|
||||
'Intended Audience :: Science/Research',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Topic :: Scientific/Engineering :: Artificial Intelligence',
|
||||
'Programming Language :: Python :: 3',
|
||||
'Programming Language :: Python :: 3.7.10',
|
||||
],
|
||||
keywords='text nlp machinelearning',
|
||||
# ext_modules=[
|
||||
# CUDAExtension('seq2seq.projections.fwh_cuda',
|
||||
# sources=[
|
||||
# 'seq2seq/projections/fwh_cuda/fwh_cpp.cpp',
|
||||
# 'seq2seq/projections/fwh_cuda/fwh_cu.cu',
|
||||
# ]
|
||||
# )
|
||||
# ]
|
||||
# ,
|
||||
cmdclass={"build_ext": BuildExtension},
|
||||
install_requires=[
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
setup_package()
|
|
@ -0,0 +1,28 @@
|
|||
#!/bin/bash
|
||||
|
||||
# The MIT License (MIT)
|
||||
# Copyright (c) 2013 Alvin Abad
|
||||
|
||||
if [ $# -eq 0 ]; then
|
||||
echo "Git wrapper script that can specify an ssh-key file
|
||||
Usage:
|
||||
git.sh -i ssh-key-file git-command
|
||||
"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# remove temporary file on exit
|
||||
trap 'rm -f /tmp/.git_ssh.$$' 0
|
||||
|
||||
if [ "$1" = "-i" ]; then
|
||||
SSH_KEY=$2; shift; shift
|
||||
echo "ssh -i $SSH_KEY \$@" > /tmp/.git_ssh.$$
|
||||
chmod +x /tmp/.git_ssh.$$
|
||||
export GIT_SSH=/tmp/.git_ssh.$$
|
||||
fi
|
||||
|
||||
# in case the git command is repeated
|
||||
[ "$1" = "git" ] && shift
|
||||
|
||||
# Run the git command
|
||||
git "$@"
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
__version__ = "0.1.0"
|
||||
__version__ = "0.2.4"
|
||||
|
||||
class GlobalSetting:
|
||||
def __init__(self):
|
||||
|
|
|
@ -2,11 +2,9 @@ from copy import deepcopy
|
|||
from typing import Any, Dict, OrderedDict
|
||||
from opendelta.utils.visualization import Visualization
|
||||
import torch.nn as nn
|
||||
from transformers.file_utils import PushToHubMixin
|
||||
from opendelta.utils.logging import get_logger
|
||||
import importlib
|
||||
from opendelta.delta_configs import BaseDeltaConfig
|
||||
from opendelta.basemodel import DeltaBase
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
|
@ -114,7 +112,7 @@ class AutoDeltaConfig:
|
|||
|
||||
|
||||
@classmethod
|
||||
def from_finetuned(cls, finetuned_model_path, **kwargs):
|
||||
def from_finetuned(cls, finetuned_delta_path, **kwargs):
|
||||
r"""
|
||||
Instantiate one of the configuration classes of the library from a finetuned delta model configuration.
|
||||
The configuration class to instantiate is selected based on the ``delta_type`` property of the config object that
|
||||
|
@ -122,7 +120,7 @@ class AutoDeltaConfig:
|
|||
|
||||
Parameters:
|
||||
|
||||
finetuned_model_path (:obj:`str` or :obj:`os.PathLike`, *optional*):
|
||||
finetuned_delta_path (:obj:`str` or :obj:`os.PathLike`, *optional*):
|
||||
Can be either:
|
||||
|
||||
- A string, the *model id* of a finetuned delta model configuration hosted inside a model repo on
|
||||
|
@ -173,20 +171,19 @@ class AutoDeltaConfig:
|
|||
|
||||
"""
|
||||
|
||||
kwargs["name_or_path"] = finetuned_model_path
|
||||
|
||||
config_dict, _ = BaseDeltaConfig.get_config_dict(finetuned_model_path, **kwargs)
|
||||
config_dict, kwargs = BaseDeltaConfig.get_config_dict(finetuned_delta_path, **kwargs)
|
||||
if "delta_type" in config_dict:
|
||||
config_class = LAZY_CONFIG_MAPPING[config_dict["delta_type"]]
|
||||
return config_class.from_dict(config_dict, **kwargs)
|
||||
else:
|
||||
# Fallback: use pattern matching on the string.
|
||||
for pattern, config_class in LAZY_CONFIG_MAPPING.items():
|
||||
if pattern in str(finetuned_model):
|
||||
if pattern in str(finetuned_delta_path):
|
||||
return config_class.from_dict(config_dict, **kwargs)
|
||||
|
||||
raise ValueError(
|
||||
f"Unrecognized model in {finetuned_model_path}. "
|
||||
f"Unrecognized model in {finetuned_delta_path}. "
|
||||
f"Should have a `delta_type` key in the loaded config, or contain one of the following strings "
|
||||
f"in its name: {', '.join(LAZY_CONFIG_MAPPING.keys())}"
|
||||
)
|
||||
|
@ -355,14 +352,14 @@ class AutoDeltaModel:
|
|||
)
|
||||
|
||||
@classmethod
|
||||
def from_finetuned(cls, finetuned_model_path, backbone_model, *model_args, **kwargs):
|
||||
def from_finetuned(cls, finetuned_delta_path, backbone_model, *model_args, **kwargs):
|
||||
r""" Automatically instantiated a delta model and load the finetuned checkpoints based on the
|
||||
:obj:`finetuned_model_path`, which can either be a string pointing to a local path or a url pointint to
|
||||
:obj:`finetuned_delta_path`, which can either be a string pointing to a local path or a url pointint to
|
||||
the delta hub. It will check the hash after loading the delta model to see whether the correct backbone and
|
||||
delta checkpoint are used.
|
||||
|
||||
Args:
|
||||
finetuned_model_path (:obj:`str` or :obj:`os.PathLike`, *optional*):
|
||||
finetuned_delta_path (:obj:`str` or :obj:`os.PathLike`, *optional*):
|
||||
Can be either:
|
||||
|
||||
- A string, the *model id* of a finetuned delta model configuration hosted inside a model repo on
|
||||
|
@ -377,6 +374,7 @@ class AutoDeltaModel:
|
|||
|
||||
backbone_model (:obj:`nn.Module`): The backbone model to be modified.
|
||||
model_args: Other argument for initialize the model.
|
||||
kwargs: Other kwargs that will be passed into DeltaBase.from_finetuned.
|
||||
|
||||
Example:
|
||||
|
||||
|
@ -385,15 +383,15 @@ class AutoDeltaModel:
|
|||
delta_model = AutoDeltaModel.from_finetuned("DeltaHub/lora_t5-base-mrpc", backbone_model)
|
||||
|
||||
"""
|
||||
config = kwargs.pop("config", None)
|
||||
delta_config = kwargs.pop("delta_config", None)
|
||||
|
||||
if not isinstance(config, BaseDeltaConfig):
|
||||
config, kwargs = AutoDeltaConfig.from_finetuned(
|
||||
finetuned_model_path, return_unused_kwargs=True, **kwargs
|
||||
if not isinstance(delta_config, BaseDeltaConfig):
|
||||
delta_config, kwargs = AutoDeltaConfig.from_finetuned(
|
||||
finetuned_delta_path, return_unused_kwargs=True, **kwargs
|
||||
)
|
||||
if type(config) in cls._delta_model_mapping.keys():
|
||||
model_class = cls._delta_model_mapping[type(config)]
|
||||
return model_class.from_finetuned(finetuned_model_path, backbone_model, *model_args, **kwargs)
|
||||
if type(delta_config) in cls._delta_model_mapping.keys():
|
||||
model_class = cls._delta_model_mapping[type(delta_config)]
|
||||
return model_class.from_finetuned(finetuned_delta_path, backbone_model, *model_args, delta_config=delta_config, **kwargs)
|
||||
raise ValueError(
|
||||
f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n"
|
||||
f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}."
|
||||
|
|
|
@ -5,15 +5,6 @@ from opendelta import __version__ as opendelta_version
|
|||
from opendelta.utils import logging
|
||||
from opendelta.utils.signature import get_arg_names, get_arg_names_inside_func
|
||||
import transformers
|
||||
from transformers.file_utils import (
|
||||
PushToHubMixin,
|
||||
is_offline_mode,
|
||||
cached_path,
|
||||
is_remote_url,
|
||||
get_list_of_files,
|
||||
hf_bucket_url,
|
||||
)
|
||||
from packaging import version
|
||||
import json
|
||||
import copy
|
||||
|
||||
|
@ -26,7 +17,7 @@ logger = logging.get_logger(__name__)
|
|||
FULL_CONFIGURATION_FILE = "config.json"
|
||||
_re_configuration_file = re.compile(r"config\.(.*)\.json")
|
||||
|
||||
class BaseDeltaConfig(PushToHubMixin):
|
||||
class BaseDeltaConfig:
|
||||
r"""Base class for all configuration classes. Handles a few
|
||||
parameters common to all delta models' configurations as well as methods for loading/downloading/saving configurations.
|
||||
|
||||
|
@ -108,7 +99,7 @@ class BaseDeltaConfig(PushToHubMixin):
|
|||
|
||||
|
||||
@classmethod
|
||||
def from_finetuned(cls, finetuned_model_path: Union[str, os.PathLike], **kwargs) -> "BaseDeltaConfig":
|
||||
def from_finetuned(cls, finetuned_delta_path: Union[str, os.PathLike], **kwargs) -> "BaseDeltaConfig":
|
||||
r"""
|
||||
Instantiate a :obj:`BaseDeltaConfig` (or a derived class) from a finetined delta module configuration.
|
||||
|
||||
|
@ -132,7 +123,7 @@ class BaseDeltaConfig(PushToHubMixin):
|
|||
delta_config = LoraConfig.from_finetuned("DeltaHub/lora_t5-base_mrpc")
|
||||
|
||||
"""
|
||||
config_dict, kwargs = cls.get_config_dict(finetuned_model_path, **kwargs)
|
||||
config_dict, kwargs = cls.get_config_dict(finetuned_delta_path, **kwargs)
|
||||
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
|
||||
logger.warn(
|
||||
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
|
||||
|
@ -202,6 +193,7 @@ class BaseDeltaConfig(PushToHubMixin):
|
|||
config_dict.pop(config_key)
|
||||
unused_config_keys.append(config_key)
|
||||
logger.warning(f"The following keys are not used by {cls}.__init__ function: {unused_config_keys}")
|
||||
|
||||
config = cls(**config_dict)
|
||||
|
||||
|
||||
|
@ -215,7 +207,7 @@ class BaseDeltaConfig(PushToHubMixin):
|
|||
to_remove.append(key)
|
||||
for key in to_remove:
|
||||
kwargs.pop(key, None)
|
||||
logger.info(f"Model config {config}")
|
||||
logger.info(f"Model config\n{config}")
|
||||
|
||||
if return_unused_kwargs:
|
||||
return config, kwargs
|
||||
|
@ -224,101 +216,58 @@ class BaseDeltaConfig(PushToHubMixin):
|
|||
|
||||
@classmethod
|
||||
def get_config_dict(
|
||||
cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs
|
||||
cls, finetuned_delta_path: Union[str, os.PathLike], **kwargs
|
||||
) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
||||
"""[NODOC]
|
||||
From a ``pretrained_model_name_or_path``, resolve to a dictionary of parameters, to be used for instantiating a
|
||||
From a ``finetuned_delta_path``, resolve to a dictionary of parameters, to be used for instantiating a
|
||||
[``PretrainedConfig``] using ``from_dict``.
|
||||
Parameters:
|
||||
pretrained_model_name_or_path (:obj:`str` or :obj:`os.PathLike`):
|
||||
finetuned_delta_path (:obj:`str` or :obj:`os.PathLike`):
|
||||
The identifier of the pre-trained checkpoint from which we want the dictionary of parameters.
|
||||
Returns:
|
||||
:obj:`Tuple[Dict, Dict]`: The dictionary(ies) that will be used to instantiate the configuration object.
|
||||
"""
|
||||
cache_dir = kwargs.pop("cache_dir", None)
|
||||
force_download = kwargs.pop("force_download", False)
|
||||
resume_download = kwargs.pop("resume_download", False)
|
||||
proxies = kwargs.pop("proxies", None)
|
||||
use_auth_token = kwargs.pop("use_auth_token", None)
|
||||
local_files_only = kwargs.pop("local_files_only", False)
|
||||
revision = kwargs.pop("revision", None)
|
||||
cache_dir = kwargs.get("cache_dir", None)
|
||||
force_download = kwargs.get("force_download", False)
|
||||
# resume_download = kwargs.pop("resume_download", False)
|
||||
# proxies = kwargs.pop("proxies", None)
|
||||
# use_auth_token = kwargs.pop("use_auth_token", None)
|
||||
local_files_only = kwargs.get("local_files_only", False)
|
||||
# revision = kwargs.pop("revision", None)
|
||||
# from_pipeline = kwargs.pop("_from_pipeline", None)
|
||||
from_auto_class = kwargs.pop("_from_auto", False)
|
||||
# from_auto_class = kwargs.pop("_from_auto", False)
|
||||
|
||||
user_agent = {"file_type": "config", "from_auto_class": from_auto_class}
|
||||
# user_agent = {"file_type": "config", "from_auto_class": from_auto_class}
|
||||
# if from_pipeline is not None:
|
||||
# user_agent["using_pipeline"] = from_pipeline
|
||||
|
||||
if is_offline_mode() and not local_files_only:
|
||||
logger.info("Offline mode: forcing local_files_only=True")
|
||||
if os.environ.get("DELTACENTER_OFFLINE", '0') == '1':
|
||||
logger.info("Delta Center offline mode!")
|
||||
local_files_only = True
|
||||
|
||||
pretrained_model_name_or_path = str(pretrained_model_name_or_path)
|
||||
finetuned_delta_path = str(finetuned_delta_path)
|
||||
|
||||
|
||||
if os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path):
|
||||
config_file = pretrained_model_name_or_path
|
||||
if cache_dir is not None:
|
||||
cached_finetuned_delta_path = os.path.join(cache_dir, finetuned_delta_path)
|
||||
else:
|
||||
# configuration_file = get_configuration_file(
|
||||
# pretrained_model_name_or_path,
|
||||
# revision=revision,
|
||||
# use_auth_token=use_auth_token,
|
||||
# local_files_only=local_files_only,
|
||||
# )
|
||||
print("cache_dir", cache_dir, "|||", "pretrained_model_name_or_path", pretrained_model_name_or_path)
|
||||
cached_finetuned_delta_path = finetuned_delta_path
|
||||
|
||||
if os.path.isdir(pretrained_model_name_or_path):
|
||||
config_file = os.path.join(pretrained_model_name_or_path, "config.json")
|
||||
elif os.path.isdir(os.path.join(cache_dir, pretrained_model_name_or_path)):
|
||||
config_file = os.path.join(cache_dir, pretrained_model_name_or_path, "config.json")
|
||||
else:
|
||||
config_file = hf_bucket_url(
|
||||
pretrained_model_name_or_path, filename=configuration_file, revision=revision, mirror=None
|
||||
)
|
||||
if os.path.isfile(cached_finetuned_delta_path):
|
||||
local_files_only = True
|
||||
elif os.path.isdir(cached_finetuned_delta_path):
|
||||
# cached_finetuned_delta_path = os.path.join(cached_finetuned_delta_path, 'config.json')
|
||||
local_files_only = True
|
||||
|
||||
print("config file!!", config_file)
|
||||
try:
|
||||
# Load from URL or cache if already cached
|
||||
resolved_config_file = cached_path(
|
||||
config_file,
|
||||
cache_dir=cache_dir,
|
||||
force_download=force_download,
|
||||
proxies=proxies,
|
||||
resume_download=resume_download,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
user_agent=user_agent,
|
||||
)
|
||||
# Load config dict
|
||||
config_dict = cls._dict_from_json_file(resolved_config_file)
|
||||
|
||||
except EnvironmentError as err:
|
||||
logger.error(err)
|
||||
msg = (
|
||||
f"Can't load config for '{pretrained_model_name_or_path}'. Make sure that:\n\n"
|
||||
f"- '{pretrained_model_name_or_path}' is a correct model identifier listed on 'https://huggingface.co/models'\n"
|
||||
f" (make sure '{pretrained_model_name_or_path}' is not a path to a local directory with something else, in that case)\n\n"
|
||||
f"- or '{pretrained_model_name_or_path}' is the correct path to a directory containing a {CONFIG_NAME} file\n\n"
|
||||
)
|
||||
|
||||
if revision is not None:
|
||||
msg += f"- or '{revision}' is a valid git identifier (branch name, a tag name, or a commit id) that exists for this model name as listed on its model page on 'https://huggingface.co/models'\n\n"
|
||||
|
||||
raise EnvironmentError(msg)
|
||||
|
||||
except (json.JSONDecodeError, UnicodeDecodeError):
|
||||
msg = (
|
||||
f"Couldn't reach server at '{config_file}' to download configuration file or "
|
||||
"configuration file is not a valid JSON file. "
|
||||
f"Please check network or file content here: {resolved_config_file}."
|
||||
)
|
||||
raise EnvironmentError(msg)
|
||||
|
||||
if resolved_config_file == config_file:
|
||||
logger.info(f"loading configuration file {config_file}")
|
||||
else:
|
||||
logger.info(f"loading configuration file {config_file} from cache at {resolved_config_file}")
|
||||
# if local_files_only:
|
||||
# config_dict = cls._dict_from_json_file(cached_finetuned_delta_path)
|
||||
if not local_files_only or force_download:
|
||||
from .utils.delta_center import download as dcdownload
|
||||
# try to download from DeltaCenter
|
||||
cached_finetuned_delta_path = dcdownload(finetuned_delta_path, force_download=force_download, cache_dir=cache_dir)
|
||||
kwargs['force_download'] = False # Has been downloaded, not more forcing
|
||||
|
||||
cached_finetuned_delta_path = os.path.join(cached_finetuned_delta_path, 'config.json')
|
||||
config_dict = cls._dict_from_json_file(cached_finetuned_delta_path)
|
||||
return config_dict, kwargs
|
||||
|
||||
@classmethod
|
||||
|
@ -432,53 +381,6 @@ class BaseDeltaConfig(PushToHubMixin):
|
|||
|
||||
|
||||
|
||||
def get_configuration_file(
|
||||
path_or_repo: Union[str, os.PathLike],
|
||||
revision: Optional[str] = None,
|
||||
use_auth_token: Optional[Union[bool, str]] = None,
|
||||
local_files_only: bool = False,
|
||||
) -> str:
|
||||
"""
|
||||
Get the configuration file to use for this version of transformers.
|
||||
Args:
|
||||
path_or_repo (`:obj:str` or `:obj:os.PathLike`):
|
||||
Can be either the id of a repo on huggingface.co or a path to a *directory*.
|
||||
revision(`:obj:str`, *optional*, defaults to ``"main"``):
|
||||
The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
|
||||
git-based system for storing models and other artifacts on huggingface.co, so ``revision`` can be any
|
||||
identifier allowed by git.
|
||||
use_auth_token (:obj:`str` or *bool*, *optional*):
|
||||
The token to use as HTTP bearer authorization for remote files. If :obj:`True`, will use the token generated
|
||||
when running ``transformers-cli login`` (stored in ``~/.huggingface``).
|
||||
local_files_only (:obj:`bool`, *optional*, defaults to :obj:`False`):
|
||||
Whether or not to only rely on local files and not to attempt to download any files.
|
||||
Returns:
|
||||
:obj:`str`: The configuration file to use.
|
||||
"""
|
||||
# Inspect all files from the repo/folder.
|
||||
all_files = get_list_of_files(
|
||||
path_or_repo, revision=revision, use_auth_token=use_auth_token, local_files_only=local_files_only
|
||||
)
|
||||
configuration_files_map = {}
|
||||
for file_name in all_files:
|
||||
search = _re_configuration_file.search(file_name)
|
||||
if search is not None:
|
||||
v = search.groups()[0]
|
||||
configuration_files_map[v] = os.path.split(file_name)[-1]
|
||||
available_versions = sorted(configuration_files_map.keys())
|
||||
# Defaults to FULL_CONFIGURATION_FILE and then try to look at some newer versions.
|
||||
configuration_file = FULL_CONFIGURATION_FILE
|
||||
# transformers_version_ = version.parse(transformers_version)
|
||||
for v in available_versions:
|
||||
# if version.parse(v) <= transformers_version_:
|
||||
configuration_file = configuration_files_map[v]
|
||||
# else:
|
||||
# # No point going further since the versions are sorted.
|
||||
# break
|
||||
|
||||
return configuration_file
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
myconfig = BaseDeltaConfig.from_pretrained("../ckpts/lora/")
|
||||
myconfig.save_pretrained("../ckpts/lora.1/")
|
||||
|
|
|
@ -22,10 +22,18 @@ class InterFaceMixin:
|
|||
self._reverse_axis_order = np.argsort(self._axis_order).tolist()
|
||||
|
||||
def _transpose(self, tensor):
|
||||
return tensor.permute(*self._axis_order)
|
||||
if tensor.dim() == 3:
|
||||
return tensor.permute(*self._axis_order)
|
||||
else:
|
||||
return tensor
|
||||
|
||||
|
||||
|
||||
def _reverse_transpose(self, tensor):
|
||||
return tensor.permute(*self._reverse_axis_order).contiguous()
|
||||
if tensor.dim() == 3:
|
||||
return tensor.permute(*self._reverse_axis_order).contiguous()
|
||||
else:
|
||||
return tensor
|
||||
|
||||
def _convert_data_type(self, tensor):
|
||||
self._data_type_record = tensor.dtype
|
||||
|
@ -37,6 +45,8 @@ class InterFaceMixin:
|
|||
|
||||
|
||||
|
||||
|
||||
|
||||
class AdapterLayer(nn.Module, InterFaceMixin):
|
||||
r"""A layer of adapter tuning module.
|
||||
"""
|
||||
|
|
|
@ -1,103 +1,10 @@
|
|||
|
||||
# from dataclasses import dataclass, field, fields
|
||||
# from tkinter.messagebox import NO
|
||||
# from typing import Optional, List, Union
|
||||
# from xml.sax import default_parser_list
|
||||
# from opendelta.utils.logging import get_logger
|
||||
|
||||
# logger = get_logger(__name__)
|
||||
from DeltaCenter import OssClient
|
||||
from .file_utils import default_cache_path
|
||||
|
||||
|
||||
# alternative_names = {
|
||||
# "train_tasks": ["train_tasks", "train_task", "task_name"],
|
||||
# }
|
||||
def download(finetuned_delta_path, cache_dir=None, force_download=False):
|
||||
if cache_dir is None:
|
||||
cache_dir = default_cache_path
|
||||
path_to_unzip_file = OssClient.download(finetuned_delta_path, dest=cache_dir, force_download=force_download)
|
||||
return path_to_unzip_file
|
||||
|
||||
|
||||
# @dataclass
|
||||
# class DeltaCenterArguments:
|
||||
# """
|
||||
# The arguments that are used to distinguish between different delta models on the DeltaCenter
|
||||
# """
|
||||
# name: str = field(default="",
|
||||
# metadata={"help": "The name of the delta model checkpoint"}
|
||||
# )
|
||||
# backbone_model: str = field(default="",
|
||||
# metadata={"help": "The backbone model of the delta model"}
|
||||
# )
|
||||
# model_name_or_path: str = field(
|
||||
# default = None,
|
||||
# metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
|
||||
# )
|
||||
# model_revision: str = field(
|
||||
# default="main",
|
||||
# metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
|
||||
# )
|
||||
# delta_type: str = field(
|
||||
# default=None,
|
||||
# metadata={"help": "the type of type model, e.g., adapter, lora, etc."}
|
||||
# )
|
||||
# train_tasks: Optional[Union[List[str], str]]= field(
|
||||
# default=None,
|
||||
# metadata={"help": "the task(s) that the delta is trained on"}
|
||||
# )
|
||||
# checkpoint_size: Optional[float] = field(
|
||||
# default=None,
|
||||
# metadata={"help": "the size of the checkpoint, in MB"}
|
||||
# )
|
||||
# test_tasks: Optional[Union[List[str], str]] = field(
|
||||
# default=None,
|
||||
# metadata={"help": "the task(s) that the delta is tested on"}
|
||||
# )
|
||||
# test_performance: Optional[float] = field(
|
||||
# default=None,
|
||||
# metadata={"help": "the performance of the model on the test set"}
|
||||
# )
|
||||
# trainable_ratio: Optional[float] = field(
|
||||
# default=None,
|
||||
# metadata={"help": "the ratio of trainable parameters in the model"}
|
||||
# )
|
||||
# delta_ratio: Optional[float] = field(
|
||||
# default=None,
|
||||
# metadata={"help": "the ratio of delta parameters in the model"}
|
||||
# )
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# def create_repo_name(prefix="", center_args=None, **kwargs
|
||||
# ):
|
||||
# r"""Currently, it's only a simple concatenation of the arguments.
|
||||
# """
|
||||
# if center_args is None:
|
||||
# center_args = create_delta_center_args(**kwargs)
|
||||
# reponame = prefix+"_"
|
||||
# reponame += center_args.model_name_or_path.split()[-1]+"_" if center_args.model_name_or_path is not None else ""
|
||||
# reponame += center_args.delta_type+"_" if center_args.delta_type is not None else ""
|
||||
|
||||
# # tasks
|
||||
|
||||
# if isinstance(center_args.train_tasks, list):
|
||||
# train_tasks = "+".join(center_args.train_tasks)
|
||||
# elif center_args.train_tasks is not None:
|
||||
# train_tasks = center_args.train_tasks
|
||||
# else:
|
||||
# logger.warning("train_tasks are not find in all arguments. Do you miss them?")
|
||||
# train_tasks = None
|
||||
# reponame += train_tasks+"_" if train_tasks is not None else ""
|
||||
# reponame = reponame.strip("_")
|
||||
# return reponame
|
||||
|
||||
# def create_delta_center_args(**kwargs):
|
||||
# mdict = {}
|
||||
# field = fields(DeltaCenterArguments)
|
||||
# for f in field:
|
||||
# if f.name in kwargs:
|
||||
# mdict[f.name] = kwargs[f.name]
|
||||
# else:
|
||||
# for altername in alternative_names[f.name]:
|
||||
# if altername in kwargs:
|
||||
# mdict[f.name] = kwargs[altername]
|
||||
# break
|
||||
# center_args = DeltaCenterArguments(**mdict)
|
||||
# return center_args
|
|
@ -0,0 +1,3 @@
|
|||
import os
|
||||
default_cache_path = "{}/.cache/delta_center/".format(os.path.expanduser('~'))
|
||||
WEIGHTS_NAME = 'pytorch_model.bin'
|
|
@ -1,7 +1,4 @@
|
|||
|
||||
from io import RawIOBase
|
||||
import re
|
||||
from tarfile import HeaderError
|
||||
from typing import Dict, List, Union, Optional, Callable
|
||||
from opendelta.delta_configs import BaseDeltaConfig
|
||||
from opendelta.utils.model_md5 import gen_model_hash, gen_parameter_hash
|
||||
|
@ -9,27 +6,16 @@ import torch
|
|||
import os
|
||||
from opendelta import logging
|
||||
import torch.nn as nn
|
||||
from transformers.file_utils import (
|
||||
WEIGHTS_NAME,
|
||||
PushToHubMixin,
|
||||
is_offline_mode,
|
||||
is_remote_url,
|
||||
hf_bucket_url,
|
||||
cached_path,
|
||||
)
|
||||
from transformers.utils.dummy_pt_objects import PreTrainedModel
|
||||
import hashlib
|
||||
try:
|
||||
from DeltaCenter import OssClient
|
||||
except:
|
||||
pass
|
||||
from DeltaCenter import OssClient
|
||||
import yaml
|
||||
from dataclasses import dataclass, field, fields
|
||||
import datetime
|
||||
from .file_utils import WEIGHTS_NAME
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
|
||||
|
||||
alternative_names = {
|
||||
"train_tasks": ["train_tasks", "train_task", "task_name"],
|
||||
}
|
||||
|
@ -46,14 +32,10 @@ class DeltaCenterArguments:
|
|||
backbone_model: str = field(default="",
|
||||
metadata={"help": "The backbone model of the delta model"}
|
||||
)
|
||||
model_path_public: str = field(
|
||||
backbone_model_path_public: str = field(
|
||||
default = None,
|
||||
metadata={"help": "Publicly available path (url) to pretrained model or model identifier from huggingface.co/models"}
|
||||
)
|
||||
model_revision: str = field(
|
||||
default="main",
|
||||
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
|
||||
)
|
||||
delta_type: str = field(
|
||||
default=None,
|
||||
metadata={"help": "the type of type model, e.g., adapter, lora, etc."}
|
||||
|
@ -62,6 +44,10 @@ class DeltaCenterArguments:
|
|||
default=None,
|
||||
metadata={"help": "the task(s) that the delta is trained on"}
|
||||
)
|
||||
train_datasets: Optional[Union[List[str], str]]= field(
|
||||
default=None,
|
||||
metadata={"help": "the datasets(s) that the delta is trained on"}
|
||||
)
|
||||
checkpoint_size: Optional[float] = field(
|
||||
default=None,
|
||||
metadata={"help": "the size of the checkpoint, in MB"}
|
||||
|
@ -70,6 +56,10 @@ class DeltaCenterArguments:
|
|||
default=None,
|
||||
metadata={"help": "the task(s) that the delta is tested on"}
|
||||
)
|
||||
test_datasets: Optional[Union[List[str], str]] = field(
|
||||
default=None,
|
||||
metadata={"help": "the dataset(s) that the delta is tested on"}
|
||||
)
|
||||
test_performance: Optional[float] = field(
|
||||
default=None,
|
||||
metadata={"help": "the performance of the model on the test set"}
|
||||
|
@ -86,10 +76,18 @@ class DeltaCenterArguments:
|
|||
default=None,
|
||||
metadata={"help": "the ratio of delta parameters in the model"}
|
||||
)
|
||||
usage: Optional[str] = field(
|
||||
default="",
|
||||
metadata={"help": "the usage code of the model"}
|
||||
)
|
||||
license: Optional[str] = field(
|
||||
default="apache-2.0",
|
||||
metadata={"help": "the license of the model"}
|
||||
)
|
||||
|
||||
|
||||
|
||||
class SaveLoadMixin(PushToHubMixin):
|
||||
class SaveLoadMixin:
|
||||
def add_configs_when_saving(self,):
|
||||
self.config.backbone_class = self.backbone_model.__class__.__name__
|
||||
self.config.backbone_checkpoint_name = os.path.split(self.backbone_model.config._name_or_path.strip("/"))[-1]
|
||||
|
@ -105,51 +103,53 @@ class SaveLoadMixin(PushToHubMixin):
|
|||
state_dict: Optional[dict] = None,
|
||||
save_function: Callable = torch.save,
|
||||
push_to_dc: bool = True,
|
||||
center_args: Optional[Union[DeltaCenterArguments, dict]] = None,
|
||||
center_args_pool: Optional[dict] = None,
|
||||
list_tags: Optional[List] = None,
|
||||
dict_tags: Optional[Dict] = None,
|
||||
center_args: Optional[Union[DeltaCenterArguments, dict]] = dict(),
|
||||
center_args_pool: Optional[dict] = dict(),
|
||||
list_tags: Optional[List] = list(),
|
||||
dict_tags: Optional[Dict] = dict(),
|
||||
delay_push: bool = False,
|
||||
test_result = None
|
||||
test_result = None,
|
||||
usage: Optional[str] = "",
|
||||
):
|
||||
r"""
|
||||
Save a model and its configuration file to a directory, so that it can be re-loaded using the
|
||||
:py:meth:`~DeltaBase.from_finetuned` class method.
|
||||
:py:meth:`~DeltaBase.save_finetuned` class method.
|
||||
|
||||
Arguments:
|
||||
save_directory (:obj:`str` or :obj:`os.PathLike`):
|
||||
Directory to which to save. Will be created if it doesn't exist.
|
||||
save_config (:obj:`bool`, *optional*, defaults to :obj:`True`):
|
||||
Whether or not to save the config of the model. Useful when in distributed training like TPUs and need
|
||||
to call this function on all processes. In this case, set ``save_config=True`` only on the main process
|
||||
to avoid race conditions.
|
||||
state_dict (nested dictionary of :obj:`torch.Tensor`):
|
||||
The state dictionary of the model to save. Will default to ``self.state_dict()``, but can be used to only
|
||||
save parts of the model or if special precautions need to be taken when recovering the state dictionary
|
||||
of a model (like when using model parallelism).
|
||||
save_function (:obj:`Callable`):
|
||||
The function to use to save the state dictionary. Useful on distributed training like TPUs when one
|
||||
need to replace ``torch.save`` by another method.
|
||||
push_to_dc (:obj:`bool`, *optional*, defaults to :obj:`True`): Whether or not to push the model to the DeltaCenter.
|
||||
center_args (:obj:`Union[DeltaCenterArguments, dict]`, *optional*, defaults to :obj:`None`): The arguments
|
||||
that are used to distinguish between different delta models on the DeltaCenter. It has higher priority than the `center_args_pool`.
|
||||
It will be used to group delta models.
|
||||
center_args_pool (:obj:`dict`, *optional*, defaults to :obj:`None`): The arguments's pool for DeltaCenter
|
||||
Together with center_args, they are are used to distinguish between different delta models on the DeltaCenter.
|
||||
It will be used to group delta models.
|
||||
list_tags (:obj:`List`, *optional*, defaults to :obj:`None`): The tags in the form of list for the delta model, it is the
|
||||
optional identifiers that are not expected by `DeltaCenterArgument`. It will not be used to group delta models in the delta center
|
||||
dict_tags (:obj:`Dict`, *optional*, defaults to :obj:`None`): The tags in the form of dictionary for the delta model, it is the
|
||||
optional identifiers that are not expected by `DeltaCenterArgument`. It will not be used to group delta models in the delta center.
|
||||
delay_push (:obj:`bool`, *optional*, defaults to :obj:`False`): Whether or not to delay the push to the DeltaCenter. When set to True,
|
||||
the delta object will be saved locally to save_directory, you can push it later using
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
python -m DeltaCenter upload save_directory
|
||||
|
||||
finetuned_delta_path: (optional) path to the directory where the model and its configuration file will be saved.
|
||||
If not specified, the model will be saved in the directory ``./delta_checkpoints/``,
|
||||
which is a subdirectory of the current working directory.
|
||||
save_config: (optional) if ``True``, the configuration file will be saved in the same directory as the
|
||||
model file. if ``False``, only the state dict will be saved.
|
||||
state_dict: (optional) a dictionary containing the model's state_dict. If not specified, the
|
||||
state_dict is loaded from the backbone model's trainable parameters.
|
||||
save_function: (optional) the function used to save the model. Defaults to ``torch.save``.
|
||||
state_dict_only: (optional) if ``True``, only the state_dict will be saved.
|
||||
push_to_dc: (optional) if ``True``, the model will prepare things to pushed to the DeltaCenter.
|
||||
This includes:
|
||||
- creating a configuration file for the model
|
||||
- creating a directory for the model
|
||||
- saving the model's trainable parameters
|
||||
- pushing the model to the DeltaCenter
|
||||
center_args: (optional) the arguments that are used to distinguish between different delta models on the DeltaCenter
|
||||
center_args_pool: (optional) a dictionary containing the arguments that are used to distinguish between different delta models on the DeltaCenter
|
||||
list_tags: (optional) a list of tags that will be added to the model's configuration file
|
||||
dict_tags: (optional) a dictionary of tags that will be added to the model's configuration file
|
||||
delay_push: (optional) if ``True``, the model will not be pushed to the DeltaCenter. This is useful if you want to
|
||||
push the model later.
|
||||
|
||||
"""
|
||||
|
||||
# create the config to save, including model hash, etc.
|
||||
if save_config:
|
||||
if not hasattr(self, "config"):
|
||||
self.create_config_from_model()
|
||||
self.add_configs_when_saving()
|
||||
|
||||
if push_to_dc:
|
||||
final_center_args = self.create_delta_center_args(center_args=center_args,
|
||||
center_args_pool=center_args_pool)
|
||||
|
||||
save_directory = finetuned_delta_path
|
||||
if os.path.isfile(save_directory):
|
||||
logger.error(f"Provided path ({save_directory}) should be a directory, not a file")
|
||||
|
@ -157,62 +157,78 @@ class SaveLoadMixin(PushToHubMixin):
|
|||
|
||||
os.makedirs(save_directory, exist_ok=True)
|
||||
|
||||
if push_to_dc:
|
||||
save_directory = os.path.join(save_directory, final_center_args.name)
|
||||
os.makedirs(save_directory, exist_ok=True)
|
||||
|
||||
model_to_save = self.backbone_model# unwrap_model(self)
|
||||
|
||||
# Save the model
|
||||
if state_dict is None:
|
||||
state_dict = model_to_save.state_dict()
|
||||
|
||||
# Save the config
|
||||
if save_config:
|
||||
if not hasattr(self, "config"):
|
||||
self.create_config_from_model()
|
||||
self.add_configs_when_saving()
|
||||
self.config.save_finetuned(save_directory)
|
||||
|
||||
output_model_file = os.path.join(save_directory, WEIGHTS_NAME)
|
||||
save_function(state_dict, output_model_file)
|
||||
|
||||
logger.info(f"Model weights saved in {output_model_file}")
|
||||
# Save the config
|
||||
if save_config:
|
||||
self.config.save_finetuned(save_directory)
|
||||
|
||||
final_center_args = self.create_delta_center_args(center_args=center_args,
|
||||
center_args_pool=center_args_pool)
|
||||
|
||||
logger.info("\n"+"*"*30+f"\nYou delta models has been saved locally to:\n\t{os.path.abspath(save_directory)}"
|
||||
)
|
||||
|
||||
state_dict_total_params = sum(p.numel() for p in state_dict.values())
|
||||
other_tags={}
|
||||
other_tags.update({'state_dict_total_params(M)':state_dict_total_params/1024/1024})
|
||||
other_tags.update({'test_result':test_result})
|
||||
if push_to_dc:
|
||||
self.create_yml(save_directory, final_center_args, list_tags, dict_tags,other_tags)
|
||||
logger.info("Creating yaml file for delta center")
|
||||
self.create_yml(save_directory, final_center_args, list_tags, dict_tags, other_tags)
|
||||
|
||||
if not delay_push:
|
||||
OssClient.upload(base_dir=save_directory)
|
||||
else:
|
||||
logger.info("\n"+"*"*30+f"\nYou delta models has been saved locally to:\n\t\t{os.path.abspath(save_directory)}\
|
||||
\nyou can push it to the delta center later using \n\t\tpython -m DeltaCenter upload {os.path.abspath(save_directory)}\n"
|
||||
+"*"*30)
|
||||
|
||||
# get absolute path of saved_directory,
|
||||
if not delay_push:
|
||||
OssClient.upload(base_dir=save_directory)
|
||||
else:
|
||||
logger.info(f"Delay push: you can push it to the delta center later using \n\tpython -m DeltaCenter upload {os.path.abspath(save_directory)}\n"
|
||||
+"*"*30)
|
||||
|
||||
|
||||
def create_yml(self, save_dir, config, list_tags=None, dict_tags=None,other_tags=None):
|
||||
|
||||
|
||||
def create_yml(self, save_dir, config, list_tags=list(), dict_tags=dict(),other_tags=None):
|
||||
f = open("{}/config.yml".format(save_dir), 'w')
|
||||
config_dict = vars(config)
|
||||
config_dict['dict_tags'] = dict_tags if dict_tags is not None else {}
|
||||
config_dict['list_tags'] = list_tags if list_tags is not None else []
|
||||
config_dict['dict_tags'] = dict_tags
|
||||
config_dict['list_tags'] = list_tags
|
||||
if other_tags is not None:
|
||||
config_dict.update(other_tags)
|
||||
yaml.safe_dump(config_dict, f)
|
||||
f.close()
|
||||
|
||||
def load_checkpoint(self, path, load_func=torch.load, backbone_model=None):
|
||||
r"""Simple method for loading only the checkpoint
|
||||
"""
|
||||
if backbone_model is None:
|
||||
backbone_model = self.backbone_model
|
||||
self.backbone_model.load_state_dict(load_func(f"{path}/{WEIGHTS_NAME}"), strict=False)
|
||||
|
||||
def save_checkpoint(self, path, save_func=torch.save, backbone_model=None):
|
||||
r"""Simple method for saving only the checkpoint"""
|
||||
if backbone_model is None:
|
||||
backbone_model = self.backbone_model
|
||||
save_func(backbone_model.state_dict(), f"{path}/{WEIGHTS_NAME}")
|
||||
|
||||
@classmethod
|
||||
def from_finetuned(cls,
|
||||
finetuned_delta_path: Optional[Union[str, os.PathLike]],
|
||||
backbone_model: nn.Module,
|
||||
delta_config = None,
|
||||
cache_dir: Optional[Union[str, os.PathLike]] = None,
|
||||
state_dict: Optional[dict] = None,
|
||||
*model_args,
|
||||
force_download: Optional[bool] = False,
|
||||
check_hash: Optional[bool] = True,
|
||||
local_files_only: Optional[bool] = False,
|
||||
**kwargs):
|
||||
r"""
|
||||
Instantiate a finetuned delta model from a path.
|
||||
|
@ -220,250 +236,76 @@ class SaveLoadMixin(PushToHubMixin):
|
|||
To further train the model, you can use the :meth:`freeze_module <opendelta.basemodel.DeltaBase.freeze_module>` method.
|
||||
|
||||
Parameters:
|
||||
|
||||
finetuned_model_path (:obj:`str` or :obj:`os.PathLike`, *optional*):
|
||||
Can be either:
|
||||
|
||||
- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
|
||||
Valid model ids can be located at the root-level, like ``bert-base-uncased``, or namespaced under a
|
||||
user or organization name, like ``dbmdz/bert-base-german-cased``.
|
||||
- A path to a *directory* containing model weights saved using
|
||||
:meth:`SaveLoadMixin.save_finetuned`, e.g., ``./my_model_directory/``.
|
||||
- A path or url to a *tensorflow index checkpoint file* (e.g, ``./tf_model/model.ckpt.index``). In
|
||||
this case, ``from_tf`` should be set to ``True`` and a configuration object should be provided as
|
||||
``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a
|
||||
PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
|
||||
- A path or url to a model folder containing a *flax checkpoint file* in *.msgpack* format (e.g,
|
||||
``./flax_model/`` containing ``flax_model.msgpack``). In this case, ``from_flax`` should be set to
|
||||
``True``.
|
||||
- ``None`` if you are both providing the configuration and state dictionary (resp. with keyword
|
||||
arguments ``config`` and ``state_dict``).
|
||||
backbone_model (:obj:`torch.nn.Module`): The backbone model to be modified.
|
||||
model_args (sequence of positional arguments, *optional*):
|
||||
All remaining positional arguments will be passed to the underlying model's ``__init__`` method.
|
||||
config (Union[:obj:`BaseDeltaConfig`, :obj:`str`, :obj:`os.PathLike`], *optional*): Can be either:
|
||||
- an instance of a class derived from :class:`~PretrainedConfig`,
|
||||
- a string or path valid as input to :py:meth:`~PretrainedConfig.from_pretrained`.
|
||||
|
||||
Configuration for the model to use instead of an automatically loaded configuration. Configuration can
|
||||
be automatically loaded when:
|
||||
|
||||
- The model is a model provided by the library (loaded with the *model id* string of a pretrained
|
||||
model).
|
||||
- The model was saved using :py:meth:`~PreTrainedModel.save_pretrained` and is reloaded by supplying the
|
||||
save directory.
|
||||
- The model is loaded by supplying a local directory as ``pretrained_model_name_or_path`` and a
|
||||
configuration JSON file named *config.json* is found in the directory.
|
||||
state_dict (Dict[:obj:`str`, :obj:`torch.Tensor`], *optional*):
|
||||
A state dictionary to use instead of a state dictionary loaded from saved weights file.
|
||||
This option can be used if you want to create a model from a pretrained configuration but load your own
|
||||
weights. In this case though, you should check if using :py:meth:`~PreTrainedModel.save_pretrained` and
|
||||
:py:meth:`~PreTrainedModel.from_pretrained` is not a simpler option.
|
||||
cache_dir (:obj:`Union[str, os.PathLike]`, *optional*):
|
||||
Path to a directory in which a downloaded pretrained model configuration should be cached if the
|
||||
standard cache should not be used.
|
||||
force_download (:obj:`bool`, *optional*, defaults to :obj:`False`):
|
||||
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
||||
cached versions if they exist.
|
||||
resume_download (:obj:`bool`, *optional*, defaults to :obj:`False`):
|
||||
Whether or not to delete incompletely received files. Will attempt to resume the download if such a
|
||||
file exists.
|
||||
proxies (:obj:`Dict[str, str]`, *optional*):
|
||||
A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
|
||||
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
||||
local_files_only(:obj:`bool`, *optional*, defaults to :obj:`False`):
|
||||
Whether or not to only look at local files (i.e., do not try to download the model).
|
||||
use_auth_token (:obj:`str` or *bool*, *optional*):
|
||||
The token to use as HTTP bearer authorization for remote files. If :obj:`True`, will use the token generated
|
||||
when running ``transformers-cli login`` (stored in ``~/.huggingface``).
|
||||
revision(:obj:`str`, *optional*, defaults to ``"main"``):
|
||||
The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
|
||||
git-based system for storing models and other artifacts on huggingface.co, so ``revision`` can be any
|
||||
identifier allowed by git.
|
||||
mirror(:obj:`str`, *optional*):
|
||||
Mirror source to accelerate downloads in China. If you are from China and have an accessibility
|
||||
problem, you can set this option to resolve it. Note that we do not guarantee the timeliness or safety.
|
||||
Please refer to the mirror site for more information.
|
||||
torch_dtype (:obj:`str` or :obj:`torch.dtype`, *optional*):
|
||||
Override the default :obj:`torch.dtype` and load the model under this dtype. If ``"auto"`` is passed the dtype
|
||||
will be automatically derived from the model's weights.
|
||||
|
||||
.. warning::
|
||||
|
||||
This feature is inherited from HuggingFace. We do not guarantee its usefulness currently.
|
||||
One should only disable *_fast_init* to ensure backwards compatibility with `transformers.__version__ <
|
||||
4.6.0` for seeded model initialization. This argument will be removed at the next major version. See
|
||||
`pull request 11471 <https://github.com/huggingface/transformers/pull/11471>`_ for more information.
|
||||
kwargs (remaining dictionary of keyword arguments, *optional*):
|
||||
Can be used to update the configuration object (after it being loaded) and initiate the model (e.g.,
|
||||
``output_attentions=True``). Behaves differently depending on whether a ``config`` is provided or
|
||||
automatically loaded:
|
||||
|
||||
- If a configuration is provided with ``config``, ``**kwargs`` will be directly passed to the
|
||||
underlying model's ``__init__`` method (we assume all relevant updates to the configuration have
|
||||
already been done)
|
||||
- If a configuration is not provided, ``kwargs`` will be first passed to the configuration class
|
||||
initialization function (:py:meth:`~PretrainedConfig.from_pretrained`). Each key of ``kwargs`` that
|
||||
corresponds to a configuration attribute will be used to override said attribute with the
|
||||
supplied ``kwargs`` value. Remaining keys that do not correspond to any configuration attribute
|
||||
will be passed to the underlying model's ``__init__`` function.
|
||||
|
||||
.. tip::
|
||||
Passing ``use_auth_token=True`` is required when you want to use a private model.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from transformers import AutoModelForSeq2SeqLM
|
||||
t5 = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
|
||||
from opendelta import AutoDeltaModel
|
||||
delta = AutoDeltaModel.from_finetuned("DeltaHub/lora_t5-base_mrpc", backbone_model=t5)
|
||||
delta.log()
|
||||
|
||||
|
||||
|
||||
finetuned_delta_path: (optional) path to the directory where the model and its configuration file will be saved.
|
||||
If not specified, the model will be loaded from the directory cahce directory. (see ``cache_dir``),
|
||||
backbone_model: the backbone model that will be used to instantiate the finetuned delta model.
|
||||
delta_config: (optional) the configuration file of the finetuned delta model. If not specified, the configuration file
|
||||
is loaded from the directory ``finetuned_delta_path``.
|
||||
cache_dir: (optional) path to the directory where the model and its configuration file will be saved.
|
||||
If not specified, we will first look into current working directory, then the cache directory of your system, e.g., ~/.cache/delta_center/,
|
||||
state_dict: (optional) a dictionary containing the model's state_dict. If not specified, the
|
||||
state_dict is loaded from the ``finetuned_delta_path``.
|
||||
force_download: (optional) if ``True``, the model will be downloaded from the internet even if it is already
|
||||
present in the cache directory.
|
||||
check_hash: (optional) if ``True``, check whether the hash of the model once it's trained differs from what we load now.
|
||||
local_files_only: (optional) if ``True``, the model will be loaded from the local cache directory.
|
||||
"""
|
||||
# config = kwargs.pop("config", None)
|
||||
state_dict = kwargs.pop("state_dict", None)
|
||||
# cache_dir = kwargs.pop("cache_dir", None)
|
||||
|
||||
# ignore_mismatched_sizes = kwargs.pop("ignore_mismatched_sizes", False)
|
||||
force_download = kwargs.pop("force_download", False)
|
||||
resume_download = kwargs.pop("resume_download", False)
|
||||
proxies = kwargs.pop("proxies", None)
|
||||
# output_loading_info = kwargs.pop("output_loading_info", False)
|
||||
local_files_only = kwargs.pop("local_files_only", False)
|
||||
use_auth_token = kwargs.pop("use_auth_token", None)
|
||||
revision = kwargs.pop("revision", None)
|
||||
mirror = kwargs.pop("mirror", None)
|
||||
from_pipeline = kwargs.pop("_from_pipeline", None)
|
||||
from_auto_class = kwargs.pop("_from_auto", False)
|
||||
# _fast_init = kwargs.pop("_fast_init", True)
|
||||
torch_dtype = kwargs.pop("torch_dtype", None)
|
||||
# low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)
|
||||
|
||||
user_agent = {"file_type": "model", "framework": "pytorch", "from_auto_class": from_auto_class}
|
||||
|
||||
if is_offline_mode() and not local_files_only:
|
||||
logger.info("Offline mode: forcing local_files_only=True")
|
||||
if os.environ.get("DELTACENTER_OFFLINE", '0') == '1':
|
||||
logger.info("Delta Center offline mode!")
|
||||
local_files_only = True
|
||||
|
||||
# Load config if we don't provide a configuration
|
||||
|
||||
|
||||
finetuned_delta_path = str(finetuned_delta_path)
|
||||
|
||||
if cache_dir is not None:
|
||||
cached_finetuned_delta_path = os.path.join(cache_dir, finetuned_delta_path)
|
||||
else:
|
||||
cached_finetuned_delta_path = finetuned_delta_path
|
||||
|
||||
download_from_dc = False
|
||||
if os.path.isfile(cached_finetuned_delta_path):
|
||||
raise RuntimeError(
|
||||
f"You should pass a directory to load a delta checkpoint instead of a file, "
|
||||
f"since we need the delta's configuration file."
|
||||
)
|
||||
elif os.path.isdir(cached_finetuned_delta_path):
|
||||
if os.path.isfile(os.path.join(cached_finetuned_delta_path, WEIGHTS_NAME)):
|
||||
# Load from a PyTorch checkpoint
|
||||
weight_file = os.path.join(cached_finetuned_delta_path, WEIGHTS_NAME)
|
||||
else:
|
||||
raise EnvironmentError(
|
||||
f"Error no file named {WEIGHTS_NAME} found in "
|
||||
f"directory {cached_finetuned_delta_path}."
|
||||
)
|
||||
|
||||
else:
|
||||
# try to download from DeltaCenter
|
||||
from .delta_center import download as dcdownload
|
||||
cached_finetuned_delta_path = dcdownload(finetuned_delta_path, cache_dir=cache_dir, force_download=force_download)
|
||||
download_from_dc = True
|
||||
weight_file = os.path.join(cached_finetuned_delta_path, WEIGHTS_NAME)
|
||||
|
||||
if state_dict is None:
|
||||
state_dict = torch.load(weight_file, map_location="cpu")
|
||||
|
||||
if not isinstance(delta_config, BaseDeltaConfig):
|
||||
# config_path = delta_config if delta_config is not None else finetuned_model_path # Todo check
|
||||
delta_config, model_kwargs = cls.config_class.from_finetuned(
|
||||
finetuned_model_path,
|
||||
cache_dir=cache_dir,
|
||||
cached_finetuned_delta_path,
|
||||
cache_dir=None,
|
||||
return_unused_kwargs=True,
|
||||
force_download=force_download,
|
||||
resume_download=resume_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
revision=revision,
|
||||
_from_auto=from_auto_class,
|
||||
_from_pipeline=from_pipeline,
|
||||
local_files_only=True if download_from_dc else local_files_only, # has been downloaded
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
else:
|
||||
model_kwargs = kwargs
|
||||
|
||||
print("delta_config", delta_config)
|
||||
# Load model
|
||||
if finetuned_model_path is not None:
|
||||
finetuned_model_path = str(finetuned_model_path)
|
||||
if os.path.isdir(finetuned_model_path):
|
||||
if os.path.isfile(os.path.join(finetuned_model_path, WEIGHTS_NAME)):
|
||||
# Load from a PyTorch checkpoint
|
||||
archive_file = os.path.join(finetuned_model_path, WEIGHTS_NAME)
|
||||
else:
|
||||
raise EnvironmentError(
|
||||
f"Error no file named {WEIGHTS_NAME} found in "
|
||||
f"directory {finetuned_model_path}."
|
||||
)
|
||||
elif os.path.isfile(finetuned_model_path) or is_remote_url(finetuned_model_path):
|
||||
archive_file = finetuned_model_path
|
||||
else:
|
||||
archive_file = hf_bucket_url(
|
||||
finetuned_model_path,
|
||||
filename=WEIGHTS_NAME,
|
||||
revision=revision,
|
||||
mirror=mirror,
|
||||
)
|
||||
|
||||
try:
|
||||
# Load from URL or cache if already cached #TODO
|
||||
|
||||
resolved_archive_file = cached_path(
|
||||
archive_file,
|
||||
cache_dir=cache_dir,
|
||||
force_download=force_download,
|
||||
proxies=proxies,
|
||||
resume_download=resume_download,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
user_agent=user_agent,
|
||||
)
|
||||
except EnvironmentError as err:
|
||||
logger.error(err)
|
||||
msg = (
|
||||
f"Can't load weights for '{finetuned_model_path}'. Make sure that:\n\n"
|
||||
)
|
||||
|
||||
if revision is not None:
|
||||
msg += f"- or '{revision}' is a valid git identifier (branch name, a tag name, or a commit id) that exists for this model name as listed on its model page on 'https://huggingface.co/models'\n\n"
|
||||
|
||||
raise EnvironmentError(msg)
|
||||
|
||||
if resolved_archive_file == archive_file:
|
||||
logger.info(f"loading weights file {archive_file}")
|
||||
else:
|
||||
logger.info(f"loading weights file {archive_file} from cache at {resolved_archive_file}")
|
||||
else:
|
||||
resolved_archive_file = None
|
||||
|
||||
# load pt weights early so that we know which dtype to init the model under
|
||||
|
||||
if state_dict is None:
|
||||
try:
|
||||
state_dict = torch.load(resolved_archive_file, map_location="cpu")
|
||||
except Exception as e:
|
||||
try:
|
||||
with open(resolved_archive_file) as f:
|
||||
if f.read().startswith("version"):
|
||||
raise OSError(
|
||||
"You seem to have cloned a repository without having git-lfs installed. Please install "
|
||||
"git-lfs and run `git lfs install` followed by `git lfs pull` in the folder "
|
||||
"you cloned."
|
||||
)
|
||||
else:
|
||||
raise ValueError from e
|
||||
except (UnicodeDecodeError, ValueError):
|
||||
raise OSError(
|
||||
f"Unable to load weights from pytorch checkpoint file for '{finetuned_model_path}' "
|
||||
f"at '{resolved_archive_file}'. "
|
||||
"If you tried to load a PyTorch model from a TF 2.0 checkpoint, please set from_tf=True."
|
||||
)
|
||||
|
||||
# set dtype to instantiate the model under:
|
||||
# 1. If torch_dtype is not None, we use that dtype
|
||||
# 2. If torch_dtype is "auto", we auto-detect dtype from the loaded state_dict, by checking its first
|
||||
# weights entry - we assume all weights are of the same dtype
|
||||
# we also may have config.torch_dtype available, but we won't rely on it till v5
|
||||
dtype_orig = None
|
||||
if torch_dtype is not None:
|
||||
if isinstance(torch_dtype, str):
|
||||
if torch_dtype == "auto":
|
||||
torch_dtype = next(iter(state_dict.values())).dtype
|
||||
else:
|
||||
raise ValueError(
|
||||
f"`torch_dtype` can be either a `torch.dtype` or `auto`, but received {torch_dtype}"
|
||||
)
|
||||
dtype_orig = cls._set_default_torch_dtype(torch_dtype)
|
||||
|
||||
|
||||
# Initialize the model from config and attach the delta model to the backbone_model.
|
||||
delta_model = cls.from_config(config, backbone_model, *model_args, **model_kwargs, )
|
||||
delta_model = cls.from_config(delta_config, backbone_model, *model_args, **model_kwargs, )
|
||||
|
||||
# load the state_dict into the backbone_model. As the delta model's parameter
|
||||
# is the same object as the deltas in the backbone model with different reference name,
|
||||
|
@ -471,14 +313,21 @@ class SaveLoadMixin(PushToHubMixin):
|
|||
delta_model._load_state_dict_into_backbone(backbone_model, state_dict)
|
||||
|
||||
backbone_hash = gen_model_hash(backbone_model)
|
||||
if check_hash and hasattr(config, "backbone_hash") and \
|
||||
config.backbone_hash is not None and \
|
||||
config.backbone_hash != backbone_hash:
|
||||
logger.warning("The config has an hash of the backbone model, and is"
|
||||
"different from the hash of the loaded model. This indicates a mismatch"
|
||||
"between the backbone model that the delta checkpoint is based on and"
|
||||
"the one you loaded. You propobability need to Train the model instead of"
|
||||
"directly inference. ")
|
||||
|
||||
if check_hash:
|
||||
if hasattr(delta_config, "backbone_hash") and \
|
||||
delta_config.backbone_hash is not None and \
|
||||
delta_config.backbone_hash != backbone_hash:
|
||||
logger.warning("The config has an hash of the backbone model, and is"
|
||||
"different from the hash of the loaded model. This indicates a mismatch"
|
||||
"between the backbone model that the delta checkpoint is based on and"
|
||||
"the one you loaded. You propobability need to Train the model instead of"
|
||||
"directly inference. ")
|
||||
else:
|
||||
logger.info("Hash-check passed. You can safely use this checkpoint directly.")
|
||||
else:
|
||||
logger.warning("Parameters' hash has not been checked!")
|
||||
|
||||
|
||||
# Set model in evaluation mode to deactivate DropOut modules by default
|
||||
backbone_model.eval()
|
||||
|
@ -521,20 +370,30 @@ class SaveLoadMixin(PushToHubMixin):
|
|||
|
||||
# if eventualy name is not set, create a default one
|
||||
if mdict['name'] is None or mdict['name'] == '':
|
||||
print("Warning: name is not set, use default name")
|
||||
logger.info("Name is not set, use default name.")
|
||||
mdict['name'] = self.create_default_name(**mdict)
|
||||
|
||||
if len(mdict['usage']) == 0:
|
||||
logger.info("Usage is not set, use default usage.")
|
||||
mdict['usage'] = self.create_default_usage(mdict['name'])
|
||||
|
||||
|
||||
center_args = DeltaCenterArguments(**mdict)
|
||||
return center_args
|
||||
|
||||
def create_default_usage(self, name):
|
||||
usage_str = """from opendelta import AutoDeltaModel\n""" + \
|
||||
"""delta_model = AutoDeltaModel.from_finetuned('{name_with_userid}', backbone_model=model)\n""" + \
|
||||
"""delta_model.freeze_module() # if you are going to further train it \n""" + \
|
||||
"""delta_model.log()"""
|
||||
return usage_str
|
||||
|
||||
def create_default_name(self, **kwargs):
|
||||
r"""Currently, it's only a simple concatenation of the arguments.
|
||||
"""
|
||||
print("key args", kwargs)
|
||||
|
||||
reponame = ""
|
||||
reponame += kwargs["model_path_public"].split("/")[-1]+"_" if kwargs['model_path_public'] is not None else kwargs['backbone_model']
|
||||
reponame += kwargs["backbone_model_path_public"].split("/")[-1]+"_" if kwargs['backbone_model_path_public'] is not None else kwargs['backbone_model']
|
||||
reponame += kwargs["delta_type"]+"_" if kwargs["delta_type"] is not None else ""
|
||||
|
||||
# tasks
|
||||
|
|
|
@ -47,7 +47,7 @@ t5_mapping = {
|
|||
}
|
||||
}
|
||||
},
|
||||
"final_layer_norm": {"__name__":"layer_norm"},
|
||||
"final_layer_norm": {"__name__":"layer_norm"},
|
||||
},
|
||||
"decoder": {"__name__":"decoder",
|
||||
"embed_tokens": {"__name__":"embeddings"},
|
||||
|
@ -222,8 +222,14 @@ distilbert_mapping = {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
MAPPINGERROR_MSG = "We haven't provide common structure mapping for this backbone model." + \
|
||||
" If it is a common enough PLM, please check whether it is wrapped by other wrapper model, e.g., XXXForSequenceClassification." +\
|
||||
"Please manually add the "+\
|
||||
"delta models by speicifying 'modified_modules' based on the visualization of model structure. Refer to `https://opendelta.readthedocs.io/en/latest/notes/faq.html` for detail."
|
||||
|
||||
def transform(org_key, mapping, strict=True, warning=False, verbose=False):
|
||||
|
||||
|
||||
chain = org_key.split(".")
|
||||
query = ""
|
||||
node = mapping
|
||||
|
@ -238,7 +244,7 @@ def transform(org_key, mapping, strict=True, warning=False, verbose=False):
|
|||
if strict:
|
||||
if warning:
|
||||
print(f"'{org_key}' has no common mapping.")
|
||||
return
|
||||
return
|
||||
else:
|
||||
new_chain.append(query)
|
||||
else:
|
||||
|
@ -249,19 +255,19 @@ def transform(org_key, mapping, strict=True, warning=False, verbose=False):
|
|||
new_chain.append(query)
|
||||
query = ""
|
||||
else:
|
||||
query += "."
|
||||
query += "."
|
||||
if query!="":
|
||||
if strict:
|
||||
if warning:
|
||||
print("A part of the orginial key hasn't been matched!")
|
||||
return
|
||||
return
|
||||
else:
|
||||
new_chain.append(query.strip(".")) # tailing query
|
||||
new_key = ".".join(new_chain)
|
||||
if verbose:
|
||||
print(f"{org_key} => {new_key}")
|
||||
return new_key
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -278,7 +284,7 @@ def mapping_for_SequenceClassification(mapping, type):
|
|||
mapping["classifier"] = {"__name__": "classifier"}
|
||||
elif type == "deberta":
|
||||
mapping.pop("lm_predictions.lm_head")
|
||||
mapping["pooler"] = {"__name__": "classifier"}
|
||||
mapping["pooler"] = {"__name__": "classifier"}
|
||||
mapping["classifier"] = {"__name__": "classifier"}
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
@ -289,7 +295,7 @@ def mapping_for_ConditionalGeneration(mapping, type):
|
|||
if type == "t5":
|
||||
mapping["lm_head"] = {"__name__":"lm_head.proj"}
|
||||
else:
|
||||
raise NotImplementedError
|
||||
raise NotImplementedError(MAPPINGERROR_MSG.format())
|
||||
return mapping
|
||||
|
||||
def mapping_for_CausalLM(mapping, type):
|
||||
|
@ -304,22 +310,23 @@ class _LazyLoading(OrderedDict):
|
|||
def __init__(self, mapping):
|
||||
self._mapping_string = mapping
|
||||
self._mapping = {}
|
||||
|
||||
|
||||
def __getitem__(self, key):
|
||||
if key not in self._mapping_string:
|
||||
raise KeyError(key)
|
||||
raise KeyError(MAPPINGERROR_MSG)
|
||||
value = self._mapping_string[key]
|
||||
self._mapping[key] = eval(value)
|
||||
return self._mapping[key]
|
||||
|
||||
return self._mapping[key]
|
||||
|
||||
def keys(self):
|
||||
return list(self._mapping_string.keys())
|
||||
|
||||
|
||||
def __contains__(self, item):
|
||||
|
||||
return item in self._mapping_string
|
||||
|
||||
|
||||
|
||||
class CommonStructureMap(object):
|
||||
r""" A lazy loading structure map.
|
||||
"""
|
||||
|
@ -327,7 +334,6 @@ class CommonStructureMap(object):
|
|||
"RobertaForSequenceClassification": """mapping_for_SequenceClassification(roberta_mapping, "roberta")""",
|
||||
"RobertaForMaskedLM": "roberta_mapping",
|
||||
"BertForMaskedLM": "bert_mapping",
|
||||
"BertForSequenceClassification": """mapping_for_SequenceClassification(bert_mapping, "bert")""",
|
||||
"T5ForConditionalGeneration": """mapping_for_ConditionalGeneration(t5_mapping, "t5")""",
|
||||
"DebertaV2ForSequenceClassification": """mapping_for_SequenceClassification(debertav2_mapping, "deberta")""",
|
||||
"CLIPModel":"""""",
|
||||
|
@ -348,8 +354,17 @@ class CommonStructureMap(object):
|
|||
"""
|
||||
backbone_class = type(backbone_model).__name__
|
||||
if backbone_class not in cls.Mappings:
|
||||
raise KeyError(backbone_class)
|
||||
mapping = cls.Mappings[backbone_class]
|
||||
raise KeyError(MAPPINGERROR_MSG)
|
||||
|
||||
try:
|
||||
mapping = cls.Mappings[backbone_class]
|
||||
except KeyError:
|
||||
logger.error(MAPPINGERROR_MSG)
|
||||
exit(-1)
|
||||
|
||||
|
||||
|
||||
|
||||
if visualize:
|
||||
logger.info("Since you are using the common structure mapping, draw the transformed parameter structure for checking.")
|
||||
vis = Visualization(backbone_model)
|
||||
|
@ -379,4 +394,3 @@ if __name__ == "__main__":
|
|||
|
||||
for name, _ in plm.named_modules():
|
||||
transform(name, t5_mapping, strict=True, warning=False)
|
||||
|
|
@ -9,4 +9,5 @@ rich
|
|||
web.py
|
||||
gitpython
|
||||
scipy
|
||||
sklearn
|
||||
sklearn
|
||||
delta_center_client==0.0.4
|
||||
|
|
34
setup.py
34
setup.py
|
@ -3,24 +3,34 @@ import setuptools
|
|||
import os
|
||||
import os
|
||||
|
||||
def get_requirements(path):
|
||||
print("path is :", path)
|
||||
ret = []
|
||||
|
||||
with open(os.path.join(path, "requirements.txt"), encoding="utf-8") as freq:
|
||||
for line in freq.readlines():
|
||||
ret.append( line.strip() )
|
||||
requires = """torch>=1.8.0
|
||||
transformers>=4.10.0
|
||||
datasets==1.17.0
|
||||
sentencepiece>=0.1.96
|
||||
tqdm>=4.62.2
|
||||
# loralib
|
||||
decorator
|
||||
rich
|
||||
web.py
|
||||
gitpython
|
||||
delta_center_client==0.0.4
|
||||
"""
|
||||
|
||||
def get_requirements():
|
||||
ret = [x for x in requires.split("\n") if len(x)>0]
|
||||
print("requirements:", ret)
|
||||
return ret
|
||||
|
||||
|
||||
path = os.path.dirname(os.path.abspath(__file__))
|
||||
requires = get_requirements(path)
|
||||
print(requires)
|
||||
|
||||
# path = os.path.dirname(os.path.abspath(__file__))
|
||||
# requires = get_requirements(path)
|
||||
|
||||
with open('README.md', 'r') as f:
|
||||
setuptools.setup(
|
||||
name = 'opendelta',
|
||||
version = "0.1.0",
|
||||
version = "0.2.4",
|
||||
description = "An open source framework for delta learning (parameter efficient learning).",
|
||||
long_description=open("README.md", "r", encoding="utf-8").read(),
|
||||
long_description_content_type="text/markdown",
|
||||
|
@ -30,10 +40,10 @@ with open('README.md', 'r') as f:
|
|||
url="https://github.com/thunlp/OpenDelta",
|
||||
keywords = ['PLM', 'Parameter-efficient-Learning', 'AI', 'NLP'],
|
||||
python_requires=">=3.6.0",
|
||||
install_requires=requires,
|
||||
install_requires=get_requirements(),
|
||||
package_dir={'opendelta':'opendelta'},
|
||||
package_data= {
|
||||
'opendelta':["utils/interactive/templates/*.html"],
|
||||
'opendelta':["utils/interactive/templates/*.html", 'requirments.txt'],
|
||||
},
|
||||
include_package_data=True,
|
||||
packages=setuptools.find_packages(),
|
||||
|
|
Loading…
Reference in New Issue