{ "version": "0.1", "model": { "encoder": { "n_layer": 12, "n_embd": 1024, "n_hidden": 4096, "n_head": 16, "head_dim": 128 }, "decoder": { "n_layer": 18, "n_embd": 2048, "n_hidden": 8192, "gqa_query_heads": 16, "cross_query_heads": 16, "kv_heads": 4, "gqa_head_dim": 128, "cross_head_dim": 128 }, "src_vocab_size": 256, "tgt_vocab_size": 1028, "dropout": 0.0 }, "training": { "dtype": "bfloat16", "logits_dot_in_fp32": false }, "data": { "text_length": 1024, "audio_length": 3072, "channels": 9, "text_pad_value": 0, "audio_eos_value": 1024, "audio_pad_value": 1025, "audio_bos_value": 1026, "delay_pattern": [ 0, 8, 9, 10, 11, 12, 13, 14, 15 ] } }