{ | |
"dim": 4096, | |
"text_card": 40960, | |
"existing_text_padding_id": 0, | |
"n_q": 16, | |
"dep_q": 8, | |
"card": 2048, | |
"num_heads": 32, | |
"num_layers": 32, | |
"hidden_scale": 4.125, | |
"causal": true, | |
"layer_scale": null, | |
"context": 3000, | |
"max_period": 10000, | |
"gating": "silu", | |
"norm": "rms_norm_f32", | |
"positional_embedding": "rope", | |
"depformer_dim": 1024, | |
"depformer_dim_feedforward": 4224, | |
"depformer_num_heads": 16, | |
"depformer_num_layers": 6, | |
"depformer_layer_scale": null, | |
"depformer_multi_linear": true, | |
"depformer_context": 8, | |
"depformer_max_period": 10000, | |
"depformer_gating": "silu", | |
"depformer_pos_emb": "none", | |
"depformer_weights_per_step": true, | |
"delays": [ | |
0, | |
0, | |
1, | |
1, | |
1, | |
1, | |
1, | |
1, | |
1, | |
0, | |
1, | |
1, | |
1, | |
1, | |
1, | |
1, | |
1 | |
], | |
"existing_text_end_padding_id": 2 | |
} |