|
{ |
|
"model_parameters": { |
|
"n_layers": 32, |
|
"vocab_size": 128256, |
|
"embed_dim": 4096, |
|
"ffn_hidden_dim": 14336, |
|
"head_dim": 128, |
|
"n_kv_heads": 8, |
|
"rope_theta": 500000.0, |
|
"rms_norm_eps": 1e-05, |
|
"attention_mask_value": -100000.0, |
|
"tie_embedding": false |
|
}, |
|
"qnn_parameters": { |
|
"n_hvx_threads": 4 |
|
}, |
|
"graphs": [ |
|
{ |
|
"type": "transformers", |
|
"start_layer_id": 0, |
|
"end_layer_id": 8, |
|
"batch_size": 1, |
|
"cache_size": 1920, |
|
"context_size": 2048, |
|
"graph_name": "batch_1", |
|
"model_path": "llama3_1_8b_0.bin", |
|
"kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", |
|
"kv_size": 13, |
|
"x_name": "x", |
|
"out_name": "out" |
|
}, |
|
{ |
|
"type": "transformers", |
|
"start_layer_id": 8, |
|
"end_layer_id": 16, |
|
"batch_size": 1, |
|
"cache_size": 1920, |
|
"context_size": 2048, |
|
"graph_name": "batch_1", |
|
"model_path": "llama3_1_8b_1.bin", |
|
"kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", |
|
"kv_size": 13, |
|
"x_name": "x", |
|
"out_name": "out" |
|
}, |
|
{ |
|
"type": "transformers", |
|
"start_layer_id": 16, |
|
"end_layer_id": 24, |
|
"batch_size": 1, |
|
"cache_size": 1920, |
|
"context_size": 2048, |
|
"graph_name": "batch_1", |
|
"model_path": "llama3_1_8b_2.bin", |
|
"kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", |
|
"kv_size": 13, |
|
"x_name": "x", |
|
"out_name": "out" |
|
}, |
|
{ |
|
"type": "transformers", |
|
"start_layer_id": 24, |
|
"end_layer_id": 32, |
|
"batch_size": 1, |
|
"cache_size": 1920, |
|
"context_size": 2048, |
|
"graph_name": "batch_1", |
|
"model_path": "llama3_1_8b_3.bin", |
|
"kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", |
|
"kv_size": 13, |
|
"x_name": "x", |
|
"out_name": "out" |
|
}, |
|
{ |
|
"type": "transformers", |
|
"start_layer_id": 0, |
|
"end_layer_id": 8, |
|
"batch_size": 128, |
|
"cache_size": 1920, |
|
"context_size": 2048, |
|
"graph_name": "batch_128", |
|
"model_path": "llama3_1_8b_0.bin", |
|
"kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", |
|
"kv_size": 13, |
|
"x_name": "x", |
|
"out_name": "out" |
|
}, |
|
{ |
|
"type": "transformers", |
|
"start_layer_id": 8, |
|
"end_layer_id": 16, |
|
"batch_size": 128, |
|
"cache_size": 1920, |
|
"context_size": 2048, |
|
"graph_name": "batch_128", |
|
"model_path": "llama3_1_8b_1.bin", |
|
"kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", |
|
"kv_size": 13, |
|
"x_name": "x", |
|
"out_name": "out" |
|
}, |
|
{ |
|
"type": "transformers", |
|
"start_layer_id": 16, |
|
"end_layer_id": 24, |
|
"batch_size": 128, |
|
"cache_size": 1920, |
|
"context_size": 2048, |
|
"graph_name": "batch_128", |
|
"model_path": "llama3_1_8b_2.bin", |
|
"kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", |
|
"kv_size": 13, |
|
"x_name": "x", |
|
"out_name": "out" |
|
}, |
|
{ |
|
"type": "transformers", |
|
"start_layer_id": 24, |
|
"end_layer_id": 32, |
|
"batch_size": 128, |
|
"cache_size": 1920, |
|
"context_size": 2048, |
|
"graph_name": "batch_128", |
|
"model_path": "llama3_1_8b_3.bin", |
|
"kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", |
|
"kv_size": 13, |
|
"x_name": "x", |
|
"out_name": "out" |
|
} |
|
], |
|
"embeddings": [ |
|
{ |
|
"graph_name": "batch_1", |
|
"model_path": "lm_head.bin", |
|
"batch_size": 1, |
|
"x_name": "x", |
|
"out_name": "logits" |
|
}, |
|
{ |
|
"graph_name": "batch_128", |
|
"model_path": "lm_head.bin", |
|
"batch_size": 128, |
|
"x_name": "x", |
|
"out_name": "logits" |
|
} |
|
] |
|
} |