{ "d_qk_head": 256, "d_ov_head": 1, "n_qk_heads": 64, "n_ov_heads": 32768, "device": "cuda", "dtype": "torch.float", "virtual_kv_num": 0, "use_z_relu": true, "n_ctx": 1024, "layer": 16, "model_name": "meta-llama/Llama-3.1-8B", "mode": "top_k", "top_k": 128, "avg_norm": { "in": 31.657766342163086, "out": 3.292898416519165 }, "d_model": 4096, "attn_scale": 11.313708498984761, "positional_embedding_type": "rotary", "rotary_scale": 1, "rotary_dim": 256, "rotary_base": 500000.0, "rotary_adjacent_pairs": false, "use_NTK_by_parts_rope": true, "NTK_by_parts_low_freq_factor": 1.0, "NTK_by_parts_high_freq_factor": 4.0, "NTK_by_parts_factor": 8.0, "old_context_len": 8192 }