|
{ |
|
"architectures": [ |
|
"ArcticLSTMSpeculatorPreTrainedModel" |
|
], |
|
"base_model_name_or_path": "meta-llama/Llama-3.1-70B-Instruct", |
|
"base_model_archs": [ |
|
"LlamaForCausalLM", |
|
"LlamaSwiftKVForCausalLM" |
|
], |
|
"input_hidden_dim": 8192, |
|
"inner_dim": "4096", |
|
"proj_dim": "4096", |
|
"emb_dim": "4096", |
|
"model_type": "mlp_speculator", |
|
"n_candidates": 3, |
|
"n_predict": 3, |
|
"scale_input": true, |
|
"tie_weights": true, |
|
"tie_lstm_embs": true, |
|
"top_k_tokens_per_head": [ |
|
1, |
|
1, |
|
1 |
|
], |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.47.0", |
|
"vocab_size": 128256, |
|
"method": "sum_lstm" |
|
} |