0.0.2
This commit is contained in:
parent
266a00e390
commit
097d122aee
|
@ -20,3 +20,6 @@ log.txt
|
|||
*beans/
|
||||
**/examples/*/configs/
|
||||
!examples/*/configs/config_gen.py
|
||||
**/jupyter_notebook_examples/
|
||||
!examples/jupyter_notebook_examples/*.py
|
||||
|
||||
|
|
|
@ -46,6 +46,50 @@ BaseConfigs['t5-base'] = {
|
|||
"save_strategy": "steps"
|
||||
}
|
||||
|
||||
|
||||
BaseConfigs['t5-xxl'] = {
|
||||
("job_name", "task_name", "eval_dataset_name", "test_dataset_name", "num_train_epochs",
|
||||
"max_source_length",
|
||||
"per_device_train_batch_size", "per_device_eval_batch_size", "warmup_steps","save_steps", "eval_steps"): zip(
|
||||
["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record",
|
||||
"superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
|
||||
["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
|
||||
["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
|
||||
["superglue-boolq", "superglue-cb", "superglue-copa", "superglue-wic", "superglue-multirc", "superglue-record", "superglue-wsc.fixed", "mrpc", "cola", "sst2", "qnli", "rte", "mnli", "qqp", "stsb"],
|
||||
[ 20, 20, 40, 20, 3, 3, 20, 20, 20, 3, 3, 20, 3, 3, 20],
|
||||
[256, 256, 256, 256, 256, 512, 256, 128, 128, 128, 128, 128, 128, 128, 128],
|
||||
[ 32, 32, 32, 32, 32, 16, 32] + [4] * 8,
|
||||
[ 32, 32, 32, 32, 32, 16, 32] + [4] * 8,
|
||||
[0] *7 +[0] *8,
|
||||
[200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
|
||||
[200, 100, 50, 100, 200, 200, 100, 200, 100, 200, 200, 100, 200, 200, 100],
|
||||
),
|
||||
"do_train": True,
|
||||
"do_eval": True,
|
||||
"do_test": True,
|
||||
|
||||
"model_name_or_path": "/home/hushengding/plm_cache/t5-xxl-lm-adapt/",
|
||||
"tokenizer_name": "/home/hushengding/plm_cache/t5-xxl-lm-adapt/",
|
||||
"save_total_limit": 1,
|
||||
# For glue datasets.
|
||||
"split_validation_test": True,
|
||||
"seed": 42,
|
||||
"dataset_config_name": ["en"],
|
||||
"eval_dataset_config_name": ["en"],
|
||||
"test_dataset_config_name": ["en"],
|
||||
# other configurations.
|
||||
"predict_with_generate": True,
|
||||
# To evaluate during training.
|
||||
"load_best_model_at_end": True,
|
||||
"metric_for_best_model": "average_metrics",
|
||||
"greater_is_better": True,
|
||||
"evaluation_strategy": "steps",
|
||||
"overwrite_output_dir": True,
|
||||
"push_to_hub": True,
|
||||
"save_strategy": "steps",
|
||||
"model_parallel": True
|
||||
}
|
||||
|
||||
AllConfigs['bitfit_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
|
||||
AllConfigs['bitfit_t5-base'].update({
|
||||
"delta_type": "bitfit",
|
||||
|
@ -163,6 +207,21 @@ AllConfigs['low_rank_adapter_t5-base'].update({
|
|||
"low_rank_rank": 1,
|
||||
})
|
||||
|
||||
AllConfigs['low_rank_adapter_t5-xxl'] = copy.deepcopy(BaseConfigs['t5-xxl'])
|
||||
AllConfigs['low_rank_adapter_t5-xxl'].update({
|
||||
"delta_type": "low_rank_adapter",
|
||||
"learning_rate": 3e-4,
|
||||
"unfrozen_modules": [
|
||||
"deltas",
|
||||
"layer_norm",
|
||||
"final_layer_norm"
|
||||
],
|
||||
"output_dir": "outputs/low_rank_adapter/t5-xxl/",
|
||||
"non_linearity": "gelu_new",
|
||||
"low_rank_w_init": "glorot-uniform",
|
||||
"low_rank_rank": 1,
|
||||
})
|
||||
|
||||
|
||||
AllConfigs['soft_prompt_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
|
||||
AllConfigs['soft_prompt_t5-base'].update({
|
||||
|
|
|
@ -632,7 +632,7 @@ class DeltaBase(nn.Module, SaveLoadMixin):
|
|||
self.config = config
|
||||
|
||||
|
||||
def log(self, module=None, delta_ratio=True, trainable_ratio=True, visualization=True):
|
||||
def log(self, module=None, delta_ratio=True, trainable_ratio=True, visualization=True, cuda_memory=True):
|
||||
r"""Log and visualize the result of applying delta.
|
||||
Possible Options are ``trainable_ratio``,
|
||||
``visualization``, ``delta_ratio``.
|
||||
|
@ -658,6 +658,15 @@ class DeltaBase(nn.Module, SaveLoadMixin):
|
|||
n_delta = self.num_delta_parameters(module)
|
||||
n_total = self.num_total_parameters(module)
|
||||
logger.info("Delta Parameter Ratio: {:2f}%".format(n_delta/n_total*100))
|
||||
if cuda_memory:
|
||||
cudamem = 0
|
||||
maxcudamem = 0
|
||||
for device_id in range(torch.cuda.device_count()):
|
||||
cudamem += torch.cuda.memory_allocated(f"cuda:{device_id}")/1024**3
|
||||
maxcudamem += torch.cuda.max_memory_allocated(f"cuda:{device_id}")/1024**3
|
||||
logger.info("Static Memory {:.2f} GB, Max Memory {:.2f} GB".format(cudamem, maxcudamem))
|
||||
|
||||
|
||||
|
||||
def num_delta_parameters(self, module: Optional[nn.Module]=None):
|
||||
r"""[NODOC] A small sugar function to get the number of trainable parameter in the backbone model. Often used to
|
||||
|
|
|
@ -113,18 +113,6 @@ def get_log_levels_dict():
|
|||
return log_levels
|
||||
|
||||
|
||||
def get_logger(name: Optional[str] = None) -> logging.Logger:
|
||||
"""
|
||||
Return a logger with the specified name.
|
||||
This function is not supposed to be directly accessed unless you are writing a custom transformers module.
|
||||
"""
|
||||
|
||||
if name is None:
|
||||
name = _get_library_name()
|
||||
|
||||
_configure_library_root_logger()
|
||||
return logging.getLogger(name)
|
||||
|
||||
|
||||
def get_verbosity() -> int:
|
||||
"""
|
||||
|
@ -275,4 +263,17 @@ def warning_advice(self, *args, **kwargs):
|
|||
|
||||
logging.Logger.warning_advice = warning_advice
|
||||
|
||||
set_verbosity_debug()
|
||||
|
||||
def get_logger(name: Optional[str] = None, verbosity='info') -> logging.Logger:
|
||||
"""
|
||||
Return a logger with the specified name.
|
||||
This function is not supposed to be directly accessed unless you are writing a custom transformers module.
|
||||
"""
|
||||
|
||||
if name is None:
|
||||
name = _get_library_name()
|
||||
|
||||
_configure_library_root_logger()
|
||||
logger = logging.getLogger(name)
|
||||
logger.setLevel(log_levels[verbosity])
|
||||
return logger
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
torch>=1.9.0
|
||||
transformers==4.10.0
|
||||
datasets==1.17.0
|
||||
sentencepiece==0.1.96
|
||||
tqdm==4.62.2
|
||||
torch>=1.8.0
|
||||
transformers>=4.10.0
|
||||
datasets>=1.17.0
|
||||
sentencepiece>=0.1.96
|
||||
tqdm>=4.62.2
|
||||
loralib
|
||||
decorator
|
||||
rich
|
||||
|
|
4
setup.py
4
setup.py
|
@ -17,7 +17,7 @@ print(requires)
|
|||
with open('README.md', 'r') as f:
|
||||
setuptools.setup(
|
||||
name = 'opendelta',
|
||||
version = '0.0.1',
|
||||
version = '0.0.2',
|
||||
description = "An open source framework for delta learning (parameter efficient learning).",
|
||||
long_description=open("README.md", "r", encoding="utf-8").read(),
|
||||
long_description_content_type="text/markdown",
|
||||
|
@ -26,7 +26,7 @@ with open('README.md', 'r') as f:
|
|||
license="Apache",
|
||||
url="https://github.com/thunlp/OpenDelta",
|
||||
keywords = ['PLM', 'Parameter-efficient-Learning', 'AI', 'NLP'],
|
||||
python_requires=">=3.8.0",
|
||||
python_requires=">=3.6.0",
|
||||
install_requires=requires,
|
||||
packages=setuptools.find_packages(),
|
||||
classifiers=[
|
||||
|
|
Loading…
Reference in New Issue