CPM-9G-8B/FM_9G/apps/fm9g_8b/model_configs/8b.json

28 lines
573 B
JSON

{
"vocab_size": 119696,
"dropout_p": 0.0,
"eps": 1e-05,
"half": true,
"half_type": "bf16",
"use_flash_attn": true,
"flash_attn_mask_shape": "2d",
"dim_model": 4096,
"dim_ff": 14336,
"dim_head": 128,
"num_heads": 32,
"num_kv_heads": 32,
"num_layers": 32,
"activate_fn": "silu",
"init_std": 0.10,
"scale": false,
"scale_emb": 12,
"scale_depth": -1,
"model_type": "fm9g",
"architectures": [
"FM9GForCausalLM"
],
"qk_norm": false,
"tie_lm_head": false,
"ffn_gated": true
}