{ "card": 2048, "n_q": 32, "dep_q": 0, "delays": [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], "dim": 2048, "text_card": 8000, "existing_text_padding_id": 3, "num_heads": 16, "num_layers": 16, "hidden_scale": 4.125, "causal": true, "layer_scale": null, "context": 750, "max_period": 100000.0, "gating": "silu", "norm": "rms_norm_f32", "positional_embedding": "rope", "depformer_dim": 1024, "depformer_num_heads": 16, "depformer_num_layers": 6, "depformer_dim_feedforward": null, "depformer_multi_linear": true, "depformer_pos_emb": "none", "depformer_weights_per_step": true, "conditioners": {}, "cross_attention": false, "model_id": { "sig": "70f8f0ea", "epoch": 500 }, "lm_gen_config": { "temp": 0.0, "temp_text": 0.0, "top_k": 250, "top_k_text": 50 }, "stt_config": { "audio_delay_seconds": 0.5, "audio_silence_prefix_seconds": 0.0 }, "model_type": "stt", "mimi_name": "mimi-pytorch-e351c8d8@125.safetensors", "tokenizer_name": "tokenizer_en_fr_audio_8000.model" }