{ "model_parameters": { "n_layers": 32, "vocab_size": 128256, "embed_dim": 4096, "ffn_hidden_dim": 14336, "head_dim": 128, "n_kv_heads": 8, "rope_theta": 500000.0, "rms_norm_eps": 1e-05, "attention_mask_value": -100000.0, "tie_embedding": false }, "qnn_parameters": { "n_hvx_threads": 4 }, "graphs": [ { "type": "transformers", "start_layer_id": 0, "end_layer_id": 8, "batch_size": 1, "cache_size": 1920, "context_size": 2048, "graph_name": "batch_1", "model_path": "llama3_1_8b_0.bin", "kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", "kv_size": 13, "x_name": "x", "out_name": "out" }, { "type": "transformers", "start_layer_id": 8, "end_layer_id": 16, "batch_size": 1, "cache_size": 1920, "context_size": 2048, "graph_name": "batch_1", "model_path": "llama3_1_8b_1.bin", "kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", "kv_size": 13, "x_name": "x", "out_name": "out" }, { "type": "transformers", "start_layer_id": 16, "end_layer_id": 24, "batch_size": 1, "cache_size": 1920, "context_size": 2048, "graph_name": "batch_1", "model_path": "llama3_1_8b_2.bin", "kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", "kv_size": 13, "x_name": "x", "out_name": "out" }, { "type": "transformers", "start_layer_id": 24, "end_layer_id": 32, "batch_size": 1, "cache_size": 1920, "context_size": 2048, "graph_name": "batch_1", "model_path": "llama3_1_8b_3.bin", "kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", "kv_size": 13, "x_name": "x", "out_name": "out" }, { "type": "transformers", "start_layer_id": 0, "end_layer_id": 8, "batch_size": 128, "cache_size": 1920, "context_size": 2048, "graph_name": "batch_128", "model_path": "llama3_1_8b_0.bin", "kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", "kv_size": 13, "x_name": "x", "out_name": "out" }, { "type": "transformers", "start_layer_id": 8, "end_layer_id": 16, "batch_size": 128, "cache_size": 1920, "context_size": 2048, "graph_name": "batch_128", "model_path": "llama3_1_8b_1.bin", "kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", "kv_size": 13, "x_name": "x", "out_name": "out" }, { "type": "transformers", "start_layer_id": 16, "end_layer_id": 24, "batch_size": 128, "cache_size": 1920, "context_size": 2048, "graph_name": "batch_128", "model_path": "llama3_1_8b_2.bin", "kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", "kv_size": 13, "x_name": "x", "out_name": "out" }, { "type": "transformers", "start_layer_id": 24, "end_layer_id": 32, "batch_size": 128, "cache_size": 1920, "context_size": 2048, "graph_name": "batch_128", "model_path": "llama3_1_8b_3.bin", "kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", "kv_size": 13, "x_name": "x", "out_name": "out" } ], "embeddings": [ { "graph_name": "batch_1", "model_path": "lm_head.bin", "batch_size": 1, "x_name": "x", "out_name": "logits" }, { "graph_name": "batch_128", "model_path": "lm_head.bin", "batch_size": 128, "x_name": "x", "out_name": "logits" } ] }