|
{ |
|
"model_parameters": { |
|
"n_layers": 36, |
|
"vocab_size": 151936, |
|
"embed_dim": 2048, |
|
"ffn_hidden_dim": 11008, |
|
"head_dim": 128, |
|
"n_kv_heads": 2, |
|
"rope_theta": 1000000.0, |
|
"rms_norm_eps": 1e-06, |
|
"attention_mask_value": -50000.0, |
|
"tie_embedding": true |
|
}, |
|
"qnn_parameters": { |
|
"n_hvx_threads": 4 |
|
}, |
|
"graphs": [ |
|
{ |
|
"type": "transformers", |
|
"start_layer_id": 0, |
|
"end_layer_id": 18, |
|
"batch_size": 16, |
|
"cache_size": 1920, |
|
"context_size": 2048, |
|
"graph_name": "batch_16", |
|
"model_path": "smallthinker_3b_0.bin", |
|
"kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", |
|
"kv_size": 11, |
|
"x_name": "x", |
|
"out_name": "out" |
|
}, |
|
{ |
|
"type": "transformers", |
|
"start_layer_id": 18, |
|
"end_layer_id": 36, |
|
"batch_size": 16, |
|
"cache_size": 1920, |
|
"context_size": 2048, |
|
"graph_name": "batch_16", |
|
"model_path": "smallthinker_3b_1.bin", |
|
"kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", |
|
"kv_size": 11, |
|
"x_name": "x", |
|
"out_name": "out" |
|
}, |
|
{ |
|
"type": "transformers", |
|
"start_layer_id": 0, |
|
"end_layer_id": 18, |
|
"batch_size": 128, |
|
"cache_size": 1920, |
|
"context_size": 2048, |
|
"graph_name": "batch_128", |
|
"model_path": "smallthinker_3b_0.bin", |
|
"kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", |
|
"kv_size": 11, |
|
"x_name": "x", |
|
"out_name": "out" |
|
}, |
|
{ |
|
"type": "transformers", |
|
"start_layer_id": 18, |
|
"end_layer_id": 36, |
|
"batch_size": 128, |
|
"cache_size": 1920, |
|
"context_size": 2048, |
|
"graph_name": "batch_128", |
|
"model_path": "smallthinker_3b_1.bin", |
|
"kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", |
|
"kv_size": 11, |
|
"x_name": "x", |
|
"out_name": "out" |
|
} |
|
], |
|
"embeddings": [ |
|
{ |
|
"graph_name": "batch_16", |
|
"model_path": "lm_head.bin", |
|
"batch_size": 16, |
|
"x_name": "x", |
|
"out_name": "logits" |
|
}, |
|
{ |
|
"graph_name": "batch_128", |
|
"model_path": "lm_head.bin", |
|
"batch_size": 128, |
|
"x_name": "x", |
|
"out_name": "logits" |
|
} |
|
] |
|
} |