{ "model_parameters": { "n_layers": 36, "vocab_size": 151936, "embed_dim": 2048, "ffn_hidden_dim": 11008, "head_dim": 128, "n_kv_heads": 2, "rope_theta": 1000000.0, "rms_norm_eps": 1e-06, "attention_mask_value": -50000.0, "tie_embedding": true }, "qnn_parameters": { "n_hvx_threads": 4 }, "graphs": [ { "type": "transformers", "start_layer_id": 0, "end_layer_id": 18, "batch_size": 1, "cache_size": 1920, "context_size": 2048, "graph_name": "batch_1", "model_path": "smallthinker_3b_0.bin", "kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", "kv_size": 11, "x_name": "x", "out_name": "out" }, { "type": "transformers", "start_layer_id": 18, "end_layer_id": 36, "batch_size": 1, "cache_size": 1920, "context_size": 2048, "graph_name": "batch_1", "model_path": "smallthinker_3b_1.bin", "kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", "kv_size": 11, "x_name": "x", "out_name": "out" }, { "type": "transformers", "start_layer_id": 0, "end_layer_id": 18, "batch_size": 12, "cache_size": 1920, "context_size": 2048, "graph_name": "batch_12", "model_path": "smallthinker_3b_0.bin", "kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", "kv_size": 11, "x_name": "x", "out_name": "out" }, { "type": "transformers", "start_layer_id": 18, "end_layer_id": 36, "batch_size": 12, "cache_size": 1920, "context_size": 2048, "graph_name": "batch_12", "model_path": "smallthinker_3b_1.bin", "kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", "kv_size": 11, "x_name": "x", "out_name": "out" }, { "type": "transformers", "start_layer_id": 0, "end_layer_id": 18, "batch_size": 128, "cache_size": 1920, "context_size": 2048, "graph_name": "batch_128", "model_path": "smallthinker_3b_0.bin", "kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", "kv_size": 11, "x_name": "x", "out_name": "out" }, { "type": "transformers", "start_layer_id": 18, "end_layer_id": 36, "batch_size": 128, "cache_size": 1920, "context_size": 2048, "graph_name": "batch_128", "model_path": "smallthinker_3b_1.bin", "kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", "kv_size": 11, "x_name": "x", "out_name": "out" } ], "embeddings": [ { "graph_name": "batch_1", "model_path": "lm_head.bin", "batch_size": 1, "x_name": "x", "out_name": "logits" }, { "graph_name": "batch_12", "model_path": "lm_head.bin", "batch_size": 12, "x_name": "x", "out_name": "logits" }, { "graph_name": "batch_128", "model_path": "lm_head.bin", "batch_size": 128, "x_name": "x", "out_name": "logits" } ] }