{ | |
"d_qk_head": 256, | |
"d_ov_head": 1, | |
"n_qk_heads": 64, | |
"n_ov_heads": 32768, | |
"device": "cuda", | |
"dtype": "torch.float", | |
"virtual_kv_num": 0, | |
"use_z_relu": true, | |
"n_ctx": 1024, | |
"layer": 16, | |
"model_name": "meta-llama/Llama-3.1-8B", | |
"mode": "top_k", | |
"top_k": 128, | |
"avg_norm": { | |
"in": 31.657766342163086, | |
"out": 3.292898416519165 | |
}, | |
"d_model": 4096, | |
"attn_scale": 11.313708498984761, | |
"positional_embedding_type": "rotary", | |
"rotary_scale": 1, | |
"rotary_dim": 256, | |
"rotary_base": 500000.0, | |
"rotary_adjacent_pairs": false, | |
"use_NTK_by_parts_rope": true, | |
"NTK_by_parts_low_freq_factor": 1.0, | |
"NTK_by_parts_high_freq_factor": 4.0, | |
"NTK_by_parts_factor": 8.0, | |
"old_context_len": 8192 | |
} |