CPM-9G-2B/FM_9G/apps/fm9g_2b/model_configs/2.4b.json

29 lines
597 B
JSON

{
"vocab_size": 122753,
"dropout_p": 0.0,
"eps": 1e-05,
"half": true,
"half_type": "bf16",
"use_flash_attn": true,
"flash_attn_mask_shape": "2d",
"dim_model": 2304,
"dim_ff": 5760,
"dim_head": 64,
"num_heads": 36,
"num_kv_heads": 36,
"num_layers": 40,
"activate_fn": "silu",
"init_std": 0.10,
"scale": true,
"scale_emb": 12,
"scale_depth": 1.4,
"dim_model_base": 256,
"model_type": "fm9g",
"architectures": [
"FM9GForCausalLM"
],
"qk_norm": false,
"tie_lm_head": true,
"ffn_gated": true
}