{ "d_qk_head": 128, "d_ov_head": 1, "n_qk_heads": 24, "n_ov_heads": 6144, "device": "cuda", "dtype": "torch.float", "virtual_kv_num": 0, "use_z_relu": true, "n_ctx": 256, "layer": 10, "model_name": "EleutherAI/pythia-160m", "mode": "top_k", "top_k": 64, "avg_norm": { "in": 144.09291076660156, "out": 3.9614734649658203 }, "d_model": 768, "attn_scale": 8.0, "positional_embedding_type": "rotary", "rotary_scale": 1, "rotary_dim": 128, "rotary_base": 10000, "rotary_adjacent_pairs": false, "use_NTK_by_parts_rope": false, "NTK_by_parts_low_freq_factor": null, "NTK_by_parts_high_freq_factor": null, "NTK_by_parts_factor": null, "old_context_len": null }