OpenDeltaMirror/examples/examples_prompt/configs/compacter_blenderbot-3b/sst2.json

66 lines
1.9 KiB
JSON

{
"backbone_model": "blenderbot",
"dataset_config_name": [
"en"
],
"datasets_load_from_disk": true,
"datasets_saved_path": "/mnt/sfs_turbo/hsd/huggingface_datasets/saved_to_disk/",
"delta_type": "compacter",
"do_eval": true,
"do_test": true,
"do_train": true,
"eval_dataset_config_name": [
"en"
],
"eval_dataset_name": "sst2",
"eval_steps": 200,
"evaluation_strategy": "steps",
"factorized_phm": true,
"factorized_phm_rule": false,
"gradient_clip": false,
"greater_is_better": true,
"hypercomplex_adapters": true,
"hypercomplex_division": 4,
"hypercomplex_nonlinearity": "glorot-uniform",
"learn_phm": true,
"learning_rate": 0.003,
"load_best_model_at_end": true,
"max_source_length": 128,
"metric_for_best_model": "average_metrics",
"model_name_or_path": "/mnt/sfs_turbo/hsd/plm_cache/blenderbot-3b",
"model_path_public": "blenderbot-3b",
"non_linearity": "gelu_new",
"normalize_phm_weight": false,
"num_train_epochs": 3,
"output_dir": "outputs/compacter/blenderbot-3b/sst2",
"overwrite_output_dir": true,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"phm_c_init": "normal",
"phm_clamp": false,
"phm_init_range": 0.0001,
"predict_with_generate": true,
"push_to_dc": true,
"push_to_hf": false,
"save_steps": 200,
"save_strategy": "steps",
"save_total_limit": 1,
"seed": 42,
"shared_phm_rule": false,
"split_validation_test": true,
"task_name": "sst2",
"test_dataset_config_name": [
"en"
],
"test_dataset_name": "sst2",
"tokenizer_name": "/mnt/sfs_turbo/hsd/plm_cache/blenderbot-3b",
"unfrozen_modules": [
"deltas",
"layer_norm",
"final_layer_norm"
],
"use_bias_down_sampler": true,
"use_bias_up_sampler": true,
"warmup_steps": 0,
"modified_modules":["fc2"]
}