28 lines
573 B
JSON
28 lines
573 B
JSON
{
|
|
"vocab_size": 119696,
|
|
"dropout_p": 0.0,
|
|
"eps": 1e-05,
|
|
"half": true,
|
|
"half_type": "bf16",
|
|
"use_flash_attn": true,
|
|
"flash_attn_mask_shape": "2d",
|
|
"dim_model": 4096,
|
|
"dim_ff": 14336,
|
|
"dim_head": 128,
|
|
"num_heads": 32,
|
|
"num_kv_heads": 32,
|
|
"num_layers": 32,
|
|
"activate_fn": "silu",
|
|
"init_std": 0.10,
|
|
"scale": false,
|
|
"scale_emb": 12,
|
|
"scale_depth": -1,
|
|
"model_type": "fm9g",
|
|
"architectures": [
|
|
"FM9GForCausalLM"
|
|
],
|
|
"qk_norm": false,
|
|
"tie_lm_head": false,
|
|
"ffn_gated": true
|
|
}
|