{ "model_parameters": { "n_layers": 16, "vocab_size": 128256, "embed_dim": 2048, "ffn_hidden_dim": 8192, "head_dim": 64, "n_kv_heads": 8, "rope_theta": 500000.0, "rms_norm_eps": 1e-05, "attention_mask_value": -100000.0, "tie_embedding": true }, "qnn_parameters": { "n_hvx_threads": 4 }, "graphs": [ { "type": "transformers", "start_layer_id": 0, "end_layer_id": 16, "batch_size": 1, "cache_size": 1920, "context_size": 2048, "graph_name": "batch_1", "model_path": "llama3_2_1b_0.bin", "kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", "kv_size": 13, "x_name": "x", "out_name": "out" }, { "type": "transformers", "start_layer_id": 0, "end_layer_id": 16, "batch_size": 128, "cache_size": 1920, "context_size": 2048, "graph_name": "batch_128", "model_path": "llama3_2_1b_0.bin", "kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", "kv_size": 13, "x_name": "x", "out_name": "out" } ], "embeddings": [ { "graph_name": "batch_1", "model_path": "lm_head.bin", "batch_size": 1, "x_name": "x", "out_name": "logits" }, { "graph_name": "batch_128", "model_path": "lm_head.bin", "batch_size": 128, "x_name": "x", "out_name": "logits" } ] }