Text Generation
PyTorch
English
openlm
linear
mistral
Eval Results
mistral-supra / config.json
sedrick-keh-tri
upload model
51801c5
raw
history blame contribute delete
535 Bytes
{
"architectures": [
"OpenLMForCausalLM"
],
"model_type": "openlm",
"dim": 4096,
"intermediate_dim_ffn": 14336,
"n_layers": 32,
"n_heads": 32,
"n_heads_kv": 8,
"vocab_size": 32000,
"norm_eps": 1e-5,
"seq_len": 2048,
"weight_tying": false,
"apply_qk_norm": false,
"qk_head_dim": 128,
"v_head_dim": 128,
"norm_type": "rms_norm",
"attn_name": "linear_attn",
"positional_embedding_type": "rotary",
"ffn_type": "swiglu",
"use_decay": true,
"use_retnet_slopes": false,
"decay_start": null
}