This commit is contained in:
shengdinghu 2022-03-20 03:09:00 +08:00
parent 266a00e390
commit 097d122aee
6 changed files with 93 additions and 21 deletions

3
.gitignore vendored
View File

@ -20,3 +20,6 @@ log.txt
*beans/
**/examples/*/configs/
!examples/*/configs/config_gen.py
**/jupyter_notebook_examples/
!examples/jupyter_notebook_examples/*.py

View File

@ -46,6 +46,50 @@ BaseConfigs['t5-base'] = {
"save_strategy": "steps"
}
BaseConfigs['t5-xxl'] = {
("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs",
"max_source_length",
"per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
"superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
[ 20, 20, 40, 20, 3, 3, 20, 20, 20, 3, 3, 20, 3, 3, 20],
[256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
[ 32, 32, 32, 32, 32, 16, 32] + [4] * 8,
[ 32, 32, 32, 32, 32, 16, 32] + [4] * 8,
[0] *7 +[0] *8,
[200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
[200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
),
"do_train": True,
"do_eval": True,
"do_test": True,
"model_name_or_path": "/home/hushengding/plm_cache/t5-xxl-lm-adapt/",
"tokenizer_name": "/home/hushengding/plm_cache/t5-xxl-lm-adapt/",
"save_total_limit": 1,
# For glue datasets.
"split_validation_test": True,
"seed": 42,
"dataset_config_name": ["en"],
"eval_dataset_config_name": ["en"],
"test_dataset_config_name": ["en"],
# other configurations.
"predict_with_generate": True,
# To evaluate during training.
"load_best_model_at_end": True,
"metric_for_best_model": "average_metrics",
"greater_is_better": True,
"evaluation_strategy": "steps",
"overwrite_output_dir": True,
"push_to_hub": True,
"save_strategy": "steps",
"model_parallel": True
}
AllConfigs['bitfit_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
AllConfigs['bitfit_t5-base'].update({
"delta_type": "bitfit",
@ -163,6 +207,21 @@ AllConfigs['low_rank_adapter_t5-base'].update({
"low_rank_rank": 1,
})
AllConfigs['low_rank_adapter_t5-xxl'] = copy.deepcopy(BaseConfigs['t5-xxl'])
AllConfigs['low_rank_adapter_t5-xxl'].update({
"delta_type": "low_rank_adapter",
"learning_rate": 3e-4,
"unfrozen_modules": [
"deltas",
"layer_norm",
"final_layer_norm"
],
"output_dir": "outputs/low_rank_adapter/t5-xxl/",
"non_linearity": "gelu_new",
"low_rank_w_init": "glorot-uniform",
"low_rank_rank": 1,
})
AllConfigs['soft_prompt_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
AllConfigs['soft_prompt_t5-base'].update({

View File

@ -632,7 +632,7 @@ class DeltaBase(nn.Module, SaveLoadMixin):
self.config = config
def log(self, module=None, delta_ratio=True, trainable_ratio=True, visualization=True):
def log(self, module=None, delta_ratio=True, trainable_ratio=True, visualization=True, cuda_memory=True):
r"""Log and visualize the result of applying delta.
Possible Options are ``trainable_ratio``,
``visualization``, ``delta_ratio``.
@ -658,6 +658,15 @@ class DeltaBase(nn.Module, SaveLoadMixin):
n_delta = self.num_delta_parameters(module)
n_total = self.num_total_parameters(module)
logger.info("Delta Parameter Ratio: {:2f}%".format(n_delta/n_total*100))
if cuda_memory:
cudamem = 0
maxcudamem = 0
for device_id in range(torch.cuda.device_count()):
cudamem += torch.cuda.memory_allocated(f"cuda:{device_id}")/1024**3
maxcudamem += torch.cuda.max_memory_allocated(f"cuda:{device_id}")/1024**3
logger.info("Static Memory {:.2f} GB, Max Memory {:.2f} GB".format(cudamem, maxcudamem))
def num_delta_parameters(self, module: Optional[nn.Module]=None):
r"""[NODOC] A small sugar function to get the number of trainable parameter in the backbone model. Often used to

View File

@ -113,18 +113,6 @@ def get_log_levels_dict():
return log_levels
def get_logger(name: Optional[str] = None) -> logging.Logger:
"""
Return a logger with the specified name.
This function is not supposed to be directly accessed unless you are writing a custom transformers module.
"""
if name is None:
name = _get_library_name()
_configure_library_root_logger()
return logging.getLogger(name)
def get_verbosity() -> int:
"""
@ -275,4 +263,17 @@ def warning_advice(self, *args, **kwargs):
logging.Logger.warning_advice = warning_advice
set_verbosity_debug()
def get_logger(name: Optional[str] = None, verbosity='info') -> logging.Logger:
"""
Return a logger with the specified name.
This function is not supposed to be directly accessed unless you are writing a custom transformers module.
"""
if name is None:
name = _get_library_name()
_configure_library_root_logger()
logger = logging.getLogger(name)
logger.setLevel(log_levels[verbosity])
return logger

View File

@ -1,8 +1,8 @@
torch>=1.9.0
transformers==4.10.0
datasets==1.17.0
sentencepiece==0.1.96
tqdm==4.62.2
torch>=1.8.0
transformers>=4.10.0
datasets>=1.17.0
sentencepiece>=0.1.96
tqdm>=4.62.2
loralib
decorator
rich

View File

@ -17,7 +17,7 @@ print(requires)
with open('README.md', 'r') as f:
setuptools.setup(
name = 'opendelta',
version = '0.0.1',
version = '0.0.2',
description = "An open source framework for delta learning (parameter efficient learning).",
long_description=open("README.md", "r", encoding="utf-8").read(),
long_description_content_type="text/markdown",
@ -26,7 +26,7 @@ with open('README.md', 'r') as f:
license="Apache",
url="https://github.com/thunlp/OpenDelta",
keywords = ['PLM', 'Parameter-efficient-Learning', 'AI', 'NLP'],
python_requires=">=3.8.0",
python_requires=">=3.6.0",
install_requires=requires,
packages=setuptools.find_packages(),
classifiers=[