{ | |
"d_qk_head": 128, | |
"d_ov_head": 1, | |
"n_qk_heads": 24, | |
"n_ov_heads": 6144, | |
"device": "cuda", | |
"dtype": "torch.float", | |
"virtual_kv_num": 0, | |
"use_z_relu": true, | |
"n_ctx": 256, | |
"layer": 3, | |
"model_name": "EleutherAI/pythia-160m", | |
"mode": "top_k", | |
"top_k": 64, | |
"avg_norm": { | |
"in": 29.04673957824707, | |
"out": 7.051708698272705 | |
}, | |
"d_model": 768, | |
"attn_scale": 8.0, | |
"positional_embedding_type": "rotary", | |
"rotary_scale": 1, | |
"rotary_dim": 128, | |
"rotary_base": 10000, | |
"rotary_adjacent_pairs": false, | |
"use_NTK_by_parts_rope": false, | |
"NTK_by_parts_low_freq_factor": null, | |
"NTK_by_parts_high_freq_factor": null, | |
"NTK_by_parts_factor": null, | |
"old_context_len": null | |
} |