program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3500.11.1"}, {"coremlc-version", "3500.21.1"}})] { func infer(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { tensor model_model_layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3145856))))[name = string("model_model_layers_0_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3178688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4751616))))[name = string("model_model_layers_0_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4768064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6340992))))[name = string("model_model_layers_0_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6357440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15794688))))[name = string("model_model_layers_0_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15893056))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25330304))))[name = string("model_model_layers_0_mlp_up_proj_weight_palettized")]; tensor model_model_layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25428672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34865920))))[name = string("model_model_layers_0_mlp_down_proj_weight_palettized")]; tensor model_model_layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34898752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38044544))))[name = string("model_model_layers_1_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38077376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39650304))))[name = string("model_model_layers_1_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39666752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41239680))))[name = string("model_model_layers_1_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41256128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50693376))))[name = string("model_model_layers_1_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50791744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60228992))))[name = string("model_model_layers_1_mlp_up_proj_weight_palettized")]; tensor model_model_layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60327360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69764608))))[name = string("model_model_layers_1_mlp_down_proj_weight_palettized")]; tensor model_model_layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69797440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72943232))))[name = string("model_model_layers_2_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72976064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74548992))))[name = string("model_model_layers_2_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74565440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76138368))))[name = string("model_model_layers_2_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76154816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85592064))))[name = string("model_model_layers_2_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85690432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95127680))))[name = string("model_model_layers_2_mlp_up_proj_weight_palettized")]; tensor model_model_layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95226048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104663296))))[name = string("model_model_layers_2_mlp_down_proj_weight_palettized")]; tensor model_model_layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104696128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107841920))))[name = string("model_model_layers_3_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107874752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109447680))))[name = string("model_model_layers_3_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109464128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111037056))))[name = string("model_model_layers_3_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111053504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120490752))))[name = string("model_model_layers_3_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120589120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130026368))))[name = string("model_model_layers_3_mlp_up_proj_weight_palettized")]; tensor model_model_layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130124736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139561984))))[name = string("model_model_layers_3_mlp_down_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139594816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142740608))))[name = string("model_model_layers_4_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142773440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144346368))))[name = string("model_model_layers_4_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144362816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145935744))))[name = string("model_model_layers_4_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145952192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155389440))))[name = string("model_model_layers_4_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155487808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164925056))))[name = string("model_model_layers_4_mlp_up_proj_weight_palettized")]; tensor model_model_layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165023424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174460672))))[name = string("model_model_layers_4_mlp_down_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174493504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177639296))))[name = string("model_model_layers_5_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177672128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179245056))))[name = string("model_model_layers_5_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179261504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180834432))))[name = string("model_model_layers_5_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180850880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190288128))))[name = string("model_model_layers_5_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190386496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(199823744))))[name = string("model_model_layers_5_mlp_up_proj_weight_palettized")]; tensor model_model_layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(199922112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209359360))))[name = string("model_model_layers_5_mlp_down_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209392192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212537984))))[name = string("model_model_layers_6_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212570816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214143744))))[name = string("model_model_layers_6_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214160192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215733120))))[name = string("model_model_layers_6_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215749568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225186816))))[name = string("model_model_layers_6_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225285184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234722432))))[name = string("model_model_layers_6_mlp_up_proj_weight_palettized")]; tensor model_model_layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234820800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244258048))))[name = string("model_model_layers_6_mlp_down_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244290880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247436672))))[name = string("model_model_layers_7_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247469504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249042432))))[name = string("model_model_layers_7_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249058880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(250631808))))[name = string("model_model_layers_7_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(250648256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260085504))))[name = string("model_model_layers_7_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260183872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269621120))))[name = string("model_model_layers_7_mlp_up_proj_weight_palettized")]; tensor model_model_layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269719488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279156736))))[name = string("model_model_layers_7_mlp_down_proj_weight_palettized")]; tensor model_model_layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279189568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282335360))))[name = string("model_model_layers_8_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282368192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283941120))))[name = string("model_model_layers_8_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283957568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285530496))))[name = string("model_model_layers_8_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285546944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294984192))))[name = string("model_model_layers_8_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295082560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304519808))))[name = string("model_model_layers_8_mlp_up_proj_weight_palettized")]; tensor model_model_layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304618176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314055424))))[name = string("model_model_layers_8_mlp_down_proj_weight_palettized")]; tensor model_model_layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314088256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317234048))))[name = string("model_model_layers_9_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317266880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318839808))))[name = string("model_model_layers_9_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318856256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320429184))))[name = string("model_model_layers_9_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_9_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320445632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(329882880))))[name = string("model_model_layers_9_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_9_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(329981248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339418496))))[name = string("model_model_layers_9_mlp_up_proj_weight_palettized")]; tensor model_model_layers_9_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339516864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348954112))))[name = string("model_model_layers_9_mlp_down_proj_weight_palettized")]; tensor model_model_layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348986944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352132736))))[name = string("model_model_layers_10_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352165568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353738496))))[name = string("model_model_layers_10_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353754944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355327872))))[name = string("model_model_layers_10_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_10_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355344320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364781568))))[name = string("model_model_layers_10_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_10_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364879936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374317184))))[name = string("model_model_layers_10_mlp_up_proj_weight_palettized")]; tensor model_model_layers_10_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374415552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383852800))))[name = string("model_model_layers_10_mlp_down_proj_weight_palettized")]; tensor model_model_layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383885632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387031424))))[name = string("model_model_layers_11_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387064256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388637184))))[name = string("model_model_layers_11_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388653632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390226560))))[name = string("model_model_layers_11_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_11_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390243008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399680256))))[name = string("model_model_layers_11_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_11_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399778624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409215872))))[name = string("model_model_layers_11_mlp_up_proj_weight_palettized")]; tensor model_model_layers_11_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409314240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418751488))))[name = string("model_model_layers_11_mlp_down_proj_weight_palettized")]; tensor model_model_layers_12_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418784320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421930112))))[name = string("model_model_layers_12_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_12_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421962944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423535872))))[name = string("model_model_layers_12_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_12_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423552320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425125248))))[name = string("model_model_layers_12_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_12_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425141696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434578944))))[name = string("model_model_layers_12_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_12_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434677312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444114560))))[name = string("model_model_layers_12_mlp_up_proj_weight_palettized")]; tensor model_model_layers_12_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444212928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453650176))))[name = string("model_model_layers_12_mlp_down_proj_weight_palettized")]; tensor model_model_layers_13_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453683008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(456828800))))[name = string("model_model_layers_13_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_13_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(456861632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458434560))))[name = string("model_model_layers_13_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_13_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458451008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(460023936))))[name = string("model_model_layers_13_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_13_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(460040384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469477632))))[name = string("model_model_layers_13_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_13_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469576000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479013248))))[name = string("model_model_layers_13_mlp_up_proj_weight_palettized")]; tensor model_model_layers_13_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479111616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488548864))))[name = string("model_model_layers_13_mlp_down_proj_weight_palettized")]; tensor model_model_layers_14_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488581696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491727488))))[name = string("model_model_layers_14_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_14_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491760320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493333248))))[name = string("model_model_layers_14_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_14_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493349696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(494922624))))[name = string("model_model_layers_14_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_14_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(494939072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504376320))))[name = string("model_model_layers_14_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_14_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504474688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513911936))))[name = string("model_model_layers_14_mlp_up_proj_weight_palettized")]; tensor model_model_layers_14_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(514010304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523447552))))[name = string("model_model_layers_14_mlp_down_proj_weight_palettized")]; tensor model_model_layers_15_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523480384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(526626176))))[name = string("model_model_layers_15_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_15_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(526659008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528231936))))[name = string("model_model_layers_15_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_15_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528248384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529821312))))[name = string("model_model_layers_15_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_15_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529837760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539275008))))[name = string("model_model_layers_15_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_15_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539373376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548810624))))[name = string("model_model_layers_15_mlp_up_proj_weight_palettized")]; tensor model_model_layers_15_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548908992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558346240))))[name = string("model_model_layers_15_mlp_down_proj_weight_palettized")]; tensor model_model_layers_16_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558379072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(561524864))))[name = string("model_model_layers_16_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_16_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(561557696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(563130624))))[name = string("model_model_layers_16_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_16_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(563147072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564720000))))[name = string("model_model_layers_16_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_16_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564736448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(574173696))))[name = string("model_model_layers_16_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_16_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(574272064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(583709312))))[name = string("model_model_layers_16_mlp_up_proj_weight_palettized")]; tensor model_model_layers_16_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(583807680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(593244928))))[name = string("model_model_layers_16_mlp_down_proj_weight_palettized")]; tensor model_model_layers_17_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(593277760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(596423552))))[name = string("model_model_layers_17_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_17_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(596456384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(598029312))))[name = string("model_model_layers_17_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_17_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(598045760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(599618688))))[name = string("model_model_layers_17_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_17_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(599635136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(609072384))))[name = string("model_model_layers_17_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_17_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(609170752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(618608000))))[name = string("model_model_layers_17_mlp_up_proj_weight_palettized")]; tensor model_model_layers_17_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(618706368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(628143616))))[name = string("model_model_layers_17_mlp_down_proj_weight_palettized")]; tensor model_model_layers_18_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(628176448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(631322240))))[name = string("model_model_layers_18_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_18_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(631355072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(632928000))))[name = string("model_model_layers_18_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_18_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(632944448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(634517376))))[name = string("model_model_layers_18_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_18_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(634533824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(643971072))))[name = string("model_model_layers_18_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_18_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(644069440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(653506688))))[name = string("model_model_layers_18_mlp_up_proj_weight_palettized")]; tensor model_model_layers_18_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(653605056))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(663042304))))[name = string("model_model_layers_18_mlp_down_proj_weight_palettized")]; tensor model_model_layers_19_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(663075136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(666220928))))[name = string("model_model_layers_19_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_19_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(666253760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(667826688))))[name = string("model_model_layers_19_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_19_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(667843136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(669416064))))[name = string("model_model_layers_19_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_19_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(669432512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678869760))))[name = string("model_model_layers_19_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_19_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678968128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(688405376))))[name = string("model_model_layers_19_mlp_up_proj_weight_palettized")]; tensor model_model_layers_19_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(688503744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697940992))))[name = string("model_model_layers_19_mlp_down_proj_weight_palettized")]; tensor model_model_layers_20_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697973824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(701119616))))[name = string("model_model_layers_20_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_20_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(701152448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702725376))))[name = string("model_model_layers_20_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_20_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702741824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(704314752))))[name = string("model_model_layers_20_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_20_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(704331200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(713768448))))[name = string("model_model_layers_20_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_20_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(713866816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(723304064))))[name = string("model_model_layers_20_mlp_up_proj_weight_palettized")]; tensor model_model_layers_20_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(723402432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(732839680))))[name = string("model_model_layers_20_mlp_down_proj_weight_palettized")]; tensor model_model_layers_21_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(732872512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736018304))))[name = string("model_model_layers_21_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_21_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736051136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(737624064))))[name = string("model_model_layers_21_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_21_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(737640512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(739213440))))[name = string("model_model_layers_21_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_21_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(739229888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748667136))))[name = string("model_model_layers_21_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_21_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748765504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(758202752))))[name = string("model_model_layers_21_mlp_up_proj_weight_palettized")]; tensor model_model_layers_21_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(758301120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(767738368))))[name = string("model_model_layers_21_mlp_down_proj_weight_palettized")]; tensor model_model_layers_22_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(767771200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(770916992))))[name = string("model_model_layers_22_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_22_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(770949824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(772522752))))[name = string("model_model_layers_22_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_22_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(772539200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774112128))))[name = string("model_model_layers_22_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_22_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774128576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(783565824))))[name = string("model_model_layers_22_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_22_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(783664192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(793101440))))[name = string("model_model_layers_22_mlp_up_proj_weight_palettized")]; tensor model_model_layers_22_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(793199808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(802637056))))[name = string("model_model_layers_22_mlp_down_proj_weight_palettized")]; tensor model_model_layers_23_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(802669888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(805815680))))[name = string("model_model_layers_23_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_23_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(805848512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(807421440))))[name = string("model_model_layers_23_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_23_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(807437888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(809010816))))[name = string("model_model_layers_23_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_23_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(809027264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(818464512))))[name = string("model_model_layers_23_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_23_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(818562880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(828000128))))[name = string("model_model_layers_23_mlp_up_proj_weight_palettized")]; tensor model_model_layers_23_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(828098496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(837535744))))[name = string("model_model_layers_23_mlp_down_proj_weight_palettized")]; tensor model_model_layers_24_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(837568576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840714368))))[name = string("model_model_layers_24_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_24_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840747200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(842320128))))[name = string("model_model_layers_24_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_24_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(842336576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(843909504))))[name = string("model_model_layers_24_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_24_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(843925952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(853363200))))[name = string("model_model_layers_24_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_24_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(853461568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(862898816))))[name = string("model_model_layers_24_mlp_up_proj_weight_palettized")]; tensor model_model_layers_24_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(862997184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872434432))))[name = string("model_model_layers_24_mlp_down_proj_weight_palettized")]; tensor model_model_layers_25_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872467264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875613056))))[name = string("model_model_layers_25_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_25_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875645888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(877218816))))[name = string("model_model_layers_25_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_25_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(877235264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(878808192))))[name = string("model_model_layers_25_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_25_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(878824640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(888261888))))[name = string("model_model_layers_25_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_25_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(888360256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(897797504))))[name = string("model_model_layers_25_mlp_up_proj_weight_palettized")]; tensor model_model_layers_25_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(897895872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907333120))))[name = string("model_model_layers_25_mlp_down_proj_weight_palettized")]; tensor model_model_layers_26_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907365952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(910511744))))[name = string("model_model_layers_26_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_26_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(910544576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912117504))))[name = string("model_model_layers_26_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_26_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912133952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(913706880))))[name = string("model_model_layers_26_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_26_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(913723328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923160576))))[name = string("model_model_layers_26_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_26_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923258944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(932696192))))[name = string("model_model_layers_26_mlp_up_proj_weight_palettized")]; tensor model_model_layers_26_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(932794560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(942231808))))[name = string("model_model_layers_26_mlp_down_proj_weight_palettized")]; tensor model_model_layers_27_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(942264640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(945410432))))[name = string("model_model_layers_27_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_27_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(945443264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(947016192))))[name = string("model_model_layers_27_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_27_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(947032640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(948605568))))[name = string("model_model_layers_27_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_27_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(948622016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(958059264))))[name = string("model_model_layers_27_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_27_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(958157632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(967594880))))[name = string("model_model_layers_27_mlp_up_proj_weight_palettized")]; tensor model_model_layers_27_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(967693248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(977130496))))[name = string("model_model_layers_27_mlp_down_proj_weight_palettized")]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor greater_equal_0 = greater_equal(x = current_pos, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(65536)]; tensor add_0 = add(x = current_pos, y = slice_by_index_0)[name = string("add_0")]; tensor select_0 = select(a = current_pos, b = add_0, cond = greater_equal_0)[name = string("select_0")]; int32 var_1503_axis_0 = const()[name = string("op_1503_axis_0"), val = int32(1)]; int32 var_1503_batch_dims_0 = const()[name = string("op_1503_batch_dims_0"), val = int32(0)]; bool var_1503_validate_indices_0 = const()[name = string("op_1503_validate_indices_0"), val = bool(false)]; tensor var_1495_to_fp16 = const()[name = string("op_1495_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(977163328)))]; tensor var_1503_cast_fp16 = gather(axis = var_1503_axis_0, batch_dims = var_1503_batch_dims_0, indices = select_0, validate_indices = var_1503_validate_indices_0, x = var_1495_to_fp16)[name = string("op_1503_cast_fp16")]; tensor var_1508 = const()[name = string("op_1508"), val = tensor([1, 1, 1, -1])]; tensor sin_1_cast_fp16 = reshape(shape = var_1508, x = var_1503_cast_fp16)[name = string("sin_1_cast_fp16")]; int32 var_1518_axis_0 = const()[name = string("op_1518_axis_0"), val = int32(1)]; int32 var_1518_batch_dims_0 = const()[name = string("op_1518_batch_dims_0"), val = int32(0)]; bool var_1518_validate_indices_0 = const()[name = string("op_1518_validate_indices_0"), val = bool(false)]; tensor var_1510_to_fp16 = const()[name = string("op_1510_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(993940608)))]; tensor var_1518_cast_fp16 = gather(axis = var_1518_axis_0, batch_dims = var_1518_batch_dims_0, indices = select_0, validate_indices = var_1518_validate_indices_0, x = var_1510_to_fp16)[name = string("op_1518_cast_fp16")]; tensor var_1523 = const()[name = string("op_1523"), val = tensor([1, 1, 1, -1])]; tensor cos_1_cast_fp16 = reshape(shape = var_1523, x = var_1518_cast_fp16)[name = string("cos_1_cast_fp16")]; tensor mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor([-1])]; bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)]; tensor mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")]; tensor input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")]; tensor var_1550_axes_0 = const()[name = string("op_1550_axes_0"), val = tensor([-1])]; tensor model_model_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010717888)))]; fp16 var_1538_to_fp16 = const()[name = string("op_1538_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1550_cast_fp16 = layer_norm(axes = var_1550_axes_0, epsilon = var_1538_to_fp16, gamma = model_model_layers_0_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_1550_cast_fp16")]; tensor var_1556 = const()[name = string("op_1556"), val = tensor([0, 2, 1])]; tensor var_1559_axes_0 = const()[name = string("op_1559_axes_0"), val = tensor([2])]; tensor var_1557 = transpose(perm = var_1556, x = var_1550_cast_fp16)[name = string("transpose_167")]; tensor var_1559 = expand_dims(axes = var_1559_axes_0, x = var_1557)[name = string("op_1559")]; string var_1575_pad_type_0 = const()[name = string("op_1575_pad_type_0"), val = string("valid")]; tensor var_1575_strides_0 = const()[name = string("op_1575_strides_0"), val = tensor([1, 1])]; tensor var_1575_pad_0 = const()[name = string("op_1575_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1575_dilations_0 = const()[name = string("op_1575_dilations_0"), val = tensor([1, 1])]; int32 var_1575_groups_0 = const()[name = string("op_1575_groups_0"), val = int32(1)]; tensor var_1575 = conv(dilations = var_1575_dilations_0, groups = var_1575_groups_0, pad = var_1575_pad_0, pad_type = var_1575_pad_type_0, strides = var_1575_strides_0, weight = model_model_layers_0_self_attn_q_proj_weight_palettized, x = var_1559)[name = string("op_1575")]; tensor var_1580 = const()[name = string("op_1580"), val = tensor([1, 16, 1, 128])]; tensor var_1581 = reshape(shape = var_1580, x = var_1575)[name = string("op_1581")]; string var_1597_pad_type_0 = const()[name = string("op_1597_pad_type_0"), val = string("valid")]; tensor var_1597_strides_0 = const()[name = string("op_1597_strides_0"), val = tensor([1, 1])]; tensor var_1597_pad_0 = const()[name = string("op_1597_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1597_dilations_0 = const()[name = string("op_1597_dilations_0"), val = tensor([1, 1])]; int32 var_1597_groups_0 = const()[name = string("op_1597_groups_0"), val = int32(1)]; tensor var_1597 = conv(dilations = var_1597_dilations_0, groups = var_1597_groups_0, pad = var_1597_pad_0, pad_type = var_1597_pad_type_0, strides = var_1597_strides_0, weight = model_model_layers_0_self_attn_k_proj_weight_palettized, x = var_1559)[name = string("op_1597")]; tensor var_1602 = const()[name = string("op_1602"), val = tensor([1, 8, 1, 128])]; tensor var_1603 = reshape(shape = var_1602, x = var_1597)[name = string("op_1603")]; string var_1619_pad_type_0 = const()[name = string("op_1619_pad_type_0"), val = string("valid")]; tensor var_1619_strides_0 = const()[name = string("op_1619_strides_0"), val = tensor([1, 1])]; tensor var_1619_pad_0 = const()[name = string("op_1619_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1619_dilations_0 = const()[name = string("op_1619_dilations_0"), val = tensor([1, 1])]; int32 var_1619_groups_0 = const()[name = string("op_1619_groups_0"), val = int32(1)]; tensor var_1619 = conv(dilations = var_1619_dilations_0, groups = var_1619_groups_0, pad = var_1619_pad_0, pad_type = var_1619_pad_type_0, strides = var_1619_strides_0, weight = model_model_layers_0_self_attn_v_proj_weight_palettized, x = var_1559)[name = string("op_1619")]; tensor var_1624 = const()[name = string("op_1624"), val = tensor([1, 8, 1, 128])]; tensor var_1625 = reshape(shape = var_1624, x = var_1619)[name = string("op_1625")]; tensor mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor([-1])]; bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)]; tensor mean_3 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = var_1581)[name = string("mean_3")]; tensor input_5 = sub(x = var_1581, y = mean_3)[name = string("input_5")]; tensor var_1646_axes_0 = const()[name = string("op_1646_axes_0"), val = tensor([-1])]; tensor model_model_layers_0_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_0_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010722048)))]; fp16 var_1634_to_fp16 = const()[name = string("op_1634_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1646_cast_fp16 = layer_norm(axes = var_1646_axes_0, epsilon = var_1634_to_fp16, gamma = model_model_layers_0_self_attn_q_norm_weight_to_fp16, x = input_5)[name = string("op_1646_cast_fp16")]; tensor mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor([-1])]; bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)]; tensor mean_5 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = var_1603)[name = string("mean_5")]; tensor input_7 = sub(x = var_1603, y = mean_5)[name = string("input_7")]; tensor var_1664_axes_0 = const()[name = string("op_1664_axes_0"), val = tensor([-1])]; tensor model_model_layers_0_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_0_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010722368)))]; fp16 var_1652_to_fp16 = const()[name = string("op_1652_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1664_cast_fp16 = layer_norm(axes = var_1664_axes_0, epsilon = var_1652_to_fp16, gamma = model_model_layers_0_self_attn_k_norm_weight_to_fp16, x = input_7)[name = string("op_1664_cast_fp16")]; tensor var_1667 = mul(x = var_1646_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1667")]; tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = var_1646_cast_fp16)[name = string("x1_1")]; tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = var_1646_cast_fp16)[name = string("x2_1")]; fp16 const_5_promoted = const()[name = string("const_5_promoted"), val = fp16(-0x1p+0)]; tensor var_1688 = mul(x = x2_1, y = const_5_promoted)[name = string("op_1688")]; int32 var_1690 = const()[name = string("op_1690"), val = int32(-1)]; bool var_1691_interleave_0 = const()[name = string("op_1691_interleave_0"), val = bool(false)]; tensor var_1691 = concat(axis = var_1690, interleave = var_1691_interleave_0, values = (var_1688, x1_1))[name = string("op_1691")]; tensor var_1692 = mul(x = var_1691, y = sin_1_cast_fp16)[name = string("op_1692")]; tensor query_states_1 = add(x = var_1667, y = var_1692)[name = string("query_states_1")]; tensor var_1695 = mul(x = var_1664_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1695")]; tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = var_1664_cast_fp16)[name = string("x1_3")]; tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = var_1664_cast_fp16)[name = string("x2_3")]; fp16 const_8_promoted = const()[name = string("const_8_promoted"), val = fp16(-0x1p+0)]; tensor var_1716 = mul(x = x2_3, y = const_8_promoted)[name = string("op_1716")]; int32 var_1718 = const()[name = string("op_1718"), val = int32(-1)]; bool var_1719_interleave_0 = const()[name = string("op_1719_interleave_0"), val = bool(false)]; tensor var_1719 = concat(axis = var_1718, interleave = var_1719_interleave_0, values = (var_1716, x1_3))[name = string("op_1719")]; tensor var_1720 = mul(x = var_1719, y = sin_1_cast_fp16)[name = string("op_1720")]; tensor key_states_1 = add(x = var_1695, y = var_1720)[name = string("key_states_1")]; int32 var_1724 = const()[name = string("op_1724"), val = int32(1)]; tensor var_1725 = add(x = current_pos, y = var_1724)[name = string("op_1725")]; tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_1725, concat_3_values3_0))[name = string("concat_3")]; tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = key_states_1, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_0_write_state")]; tensor coreml_update_state_56 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_0")]; tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([28])]; tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([29])]; int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_1725, concat_7_values3_0))[name = string("concat_7")]; tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = var_1625, x = coreml_update_state_56)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_1_write_state")]; tensor coreml_update_state_57 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_1")]; tensor var_1775_begin_0 = const()[name = string("op_1775_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1775_end_0 = const()[name = string("op_1775_end_0"), val = tensor([1, 8, 1024, 128])]; tensor var_1775_end_mask_0 = const()[name = string("op_1775_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1775_cast_fp16 = slice_by_index(begin = var_1775_begin_0, end = var_1775_end_0, end_mask = var_1775_end_mask_0, x = coreml_update_state_57)[name = string("op_1775_cast_fp16")]; tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_1775_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; tensor var_1782_begin_0 = const()[name = string("op_1782_begin_0"), val = tensor([28, 0, 0, 0])]; tensor var_1782_end_0 = const()[name = string("op_1782_end_0"), val = tensor([29, 8, 1024, 128])]; tensor var_1782_end_mask_0 = const()[name = string("op_1782_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1782_cast_fp16 = slice_by_index(begin = var_1782_begin_0, end = var_1782_end_0, end_mask = var_1782_end_mask_0, x = coreml_update_state_57)[name = string("op_1782_cast_fp16")]; tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_1782_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; tensor x_7_axes_0 = const()[name = string("x_7_axes_0"), val = tensor([1])]; tensor x_7_cast_fp16 = expand_dims(axes = x_7_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_7_cast_fp16")]; tensor var_1819 = const()[name = string("op_1819"), val = tensor([1, 2, 1, 1])]; tensor x_9_cast_fp16 = tile(reps = var_1819, x = x_7_cast_fp16)[name = string("x_9_cast_fp16")]; tensor var_1831 = const()[name = string("op_1831"), val = tensor([1, -1, 1024, 128])]; tensor key_states_3_cast_fp16 = reshape(shape = var_1831, x = x_9_cast_fp16)[name = string("key_states_3_cast_fp16")]; tensor x_13_axes_0 = const()[name = string("x_13_axes_0"), val = tensor([1])]; tensor x_13_cast_fp16 = expand_dims(axes = x_13_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_13_cast_fp16")]; tensor var_1839 = const()[name = string("op_1839"), val = tensor([1, 2, 1, 1])]; tensor x_15_cast_fp16 = tile(reps = var_1839, x = x_13_cast_fp16)[name = string("x_15_cast_fp16")]; tensor var_1851 = const()[name = string("op_1851"), val = tensor([1, -1, 1024, 128])]; tensor value_states_3_cast_fp16 = reshape(shape = var_1851, x = x_15_cast_fp16)[name = string("value_states_3_cast_fp16")]; bool var_1866_transpose_x_1 = const()[name = string("op_1866_transpose_x_1"), val = bool(false)]; bool var_1866_transpose_y_1 = const()[name = string("op_1866_transpose_y_1"), val = bool(true)]; tensor var_1866 = matmul(transpose_x = var_1866_transpose_x_1, transpose_y = var_1866_transpose_y_1, x = query_states_1, y = key_states_3_cast_fp16)[name = string("op_1866")]; fp16 var_1867_to_fp16 = const()[name = string("op_1867_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_1_cast_fp16 = mul(x = var_1866, y = var_1867_to_fp16)[name = string("attn_weights_1_cast_fp16")]; tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("attn_weights_3_cast_fp16")]; int32 var_1902 = const()[name = string("op_1902"), val = int32(-1)]; tensor attn_weights_5_cast_fp16 = softmax(axis = var_1902, x = attn_weights_3_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = attn_weights_5_cast_fp16, y = value_states_3_cast_fp16)[name = string("attn_output_1_cast_fp16")]; tensor var_1913_perm_0 = const()[name = string("op_1913_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1917 = const()[name = string("op_1917"), val = tensor([1, 1, 2048])]; tensor var_1913_cast_fp16 = transpose(perm = var_1913_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_166")]; tensor attn_output_5_cast_fp16 = reshape(shape = var_1917, x = var_1913_cast_fp16)[name = string("attn_output_5_cast_fp16")]; tensor var_1922 = const()[name = string("op_1922"), val = tensor([0, 2, 1])]; string var_1938_pad_type_0 = const()[name = string("op_1938_pad_type_0"), val = string("valid")]; int32 var_1938_groups_0 = const()[name = string("op_1938_groups_0"), val = int32(1)]; tensor var_1938_strides_0 = const()[name = string("op_1938_strides_0"), val = tensor([1])]; tensor var_1938_pad_0 = const()[name = string("op_1938_pad_0"), val = tensor([0, 0])]; tensor var_1938_dilations_0 = const()[name = string("op_1938_dilations_0"), val = tensor([1])]; tensor squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010722688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1013868480))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_1923_cast_fp16 = transpose(perm = var_1922, x = attn_output_5_cast_fp16)[name = string("transpose_165")]; tensor var_1938_cast_fp16 = conv(dilations = var_1938_dilations_0, groups = var_1938_groups_0, pad = var_1938_pad_0, pad_type = var_1938_pad_type_0, strides = var_1938_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_1923_cast_fp16)[name = string("op_1938_cast_fp16")]; tensor var_1942 = const()[name = string("op_1942"), val = tensor([0, 2, 1])]; tensor attn_output_9_cast_fp16 = transpose(perm = var_1942, x = var_1938_cast_fp16)[name = string("transpose_164")]; tensor hidden_states_5_cast_fp16 = add(x = hidden_states, y = attn_output_9_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; tensor mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor([-1])]; bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)]; tensor mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_7_cast_fp16")]; tensor input_11_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_7_cast_fp16)[name = string("input_11_cast_fp16")]; tensor var_1961_axes_0 = const()[name = string("op_1961_axes_0"), val = tensor([-1])]; tensor model_model_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1013901312)))]; fp16 var_1949_to_fp16 = const()[name = string("op_1949_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1961_cast_fp16 = layer_norm(axes = var_1961_axes_0, epsilon = var_1949_to_fp16, gamma = model_model_layers_0_post_attention_layernorm_weight_to_fp16, x = input_11_cast_fp16)[name = string("op_1961_cast_fp16")]; tensor var_1975 = const()[name = string("op_1975"), val = tensor([0, 2, 1])]; tensor input_13_axes_0 = const()[name = string("input_13_axes_0"), val = tensor([2])]; tensor var_1976 = transpose(perm = var_1975, x = var_1961_cast_fp16)[name = string("transpose_163")]; tensor input_13 = expand_dims(axes = input_13_axes_0, x = var_1976)[name = string("input_13")]; string input_15_pad_type_0 = const()[name = string("input_15_pad_type_0"), val = string("valid")]; tensor input_15_strides_0 = const()[name = string("input_15_strides_0"), val = tensor([1, 1])]; tensor input_15_pad_0 = const()[name = string("input_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_15_dilations_0 = const()[name = string("input_15_dilations_0"), val = tensor([1, 1])]; int32 input_15_groups_0 = const()[name = string("input_15_groups_0"), val = int32(1)]; tensor input_15 = conv(dilations = input_15_dilations_0, groups = input_15_groups_0, pad = input_15_pad_0, pad_type = input_15_pad_type_0, strides = input_15_strides_0, weight = model_model_layers_0_mlp_gate_proj_weight_palettized, x = input_13)[name = string("input_15")]; string b_1_pad_type_0 = const()[name = string("b_1_pad_type_0"), val = string("valid")]; tensor b_1_strides_0 = const()[name = string("b_1_strides_0"), val = tensor([1, 1])]; tensor b_1_pad_0 = const()[name = string("b_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_1_dilations_0 = const()[name = string("b_1_dilations_0"), val = tensor([1, 1])]; int32 b_1_groups_0 = const()[name = string("b_1_groups_0"), val = int32(1)]; tensor b_1 = conv(dilations = b_1_dilations_0, groups = b_1_groups_0, pad = b_1_pad_0, pad_type = b_1_pad_type_0, strides = b_1_strides_0, weight = model_model_layers_0_mlp_up_proj_weight_palettized, x = input_13)[name = string("b_1")]; tensor c_1 = silu(x = input_15)[name = string("c_1")]; tensor input_17 = mul(x = c_1, y = b_1)[name = string("input_17")]; string e_1_pad_type_0 = const()[name = string("e_1_pad_type_0"), val = string("valid")]; tensor e_1_strides_0 = const()[name = string("e_1_strides_0"), val = tensor([1, 1])]; tensor e_1_pad_0 = const()[name = string("e_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_1_dilations_0 = const()[name = string("e_1_dilations_0"), val = tensor([1, 1])]; int32 e_1_groups_0 = const()[name = string("e_1_groups_0"), val = int32(1)]; tensor e_1 = conv(dilations = e_1_dilations_0, groups = e_1_groups_0, pad = e_1_pad_0, pad_type = e_1_pad_type_0, strides = e_1_strides_0, weight = model_model_layers_0_mlp_down_proj_weight_palettized, x = input_17)[name = string("e_1")]; tensor var_1998_axes_0 = const()[name = string("op_1998_axes_0"), val = tensor([2])]; tensor var_1998 = squeeze(axes = var_1998_axes_0, x = e_1)[name = string("op_1998")]; tensor var_1999 = const()[name = string("op_1999"), val = tensor([0, 2, 1])]; tensor var_2000 = transpose(perm = var_1999, x = var_1998)[name = string("transpose_162")]; tensor hidden_states_7_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_2000)[name = string("hidden_states_7_cast_fp16")]; tensor mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor([-1])]; bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)]; tensor mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_7_cast_fp16)[name = string("mean_9_cast_fp16")]; tensor input_19_cast_fp16 = sub(x = hidden_states_7_cast_fp16, y = mean_9_cast_fp16)[name = string("input_19_cast_fp16")]; tensor var_2018_axes_0 = const()[name = string("op_2018_axes_0"), val = tensor([-1])]; tensor model_model_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1013905472)))]; fp16 var_2006_to_fp16 = const()[name = string("op_2006_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2018_cast_fp16 = layer_norm(axes = var_2018_axes_0, epsilon = var_2006_to_fp16, gamma = model_model_layers_1_input_layernorm_weight_to_fp16, x = input_19_cast_fp16)[name = string("op_2018_cast_fp16")]; tensor var_2024 = const()[name = string("op_2024"), val = tensor([0, 2, 1])]; tensor var_2027_axes_0 = const()[name = string("op_2027_axes_0"), val = tensor([2])]; tensor var_2025 = transpose(perm = var_2024, x = var_2018_cast_fp16)[name = string("transpose_161")]; tensor var_2027 = expand_dims(axes = var_2027_axes_0, x = var_2025)[name = string("op_2027")]; string var_2043_pad_type_0 = const()[name = string("op_2043_pad_type_0"), val = string("valid")]; tensor var_2043_strides_0 = const()[name = string("op_2043_strides_0"), val = tensor([1, 1])]; tensor var_2043_pad_0 = const()[name = string("op_2043_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2043_dilations_0 = const()[name = string("op_2043_dilations_0"), val = tensor([1, 1])]; int32 var_2043_groups_0 = const()[name = string("op_2043_groups_0"), val = int32(1)]; tensor var_2043 = conv(dilations = var_2043_dilations_0, groups = var_2043_groups_0, pad = var_2043_pad_0, pad_type = var_2043_pad_type_0, strides = var_2043_strides_0, weight = model_model_layers_1_self_attn_q_proj_weight_palettized, x = var_2027)[name = string("op_2043")]; tensor var_2048 = const()[name = string("op_2048"), val = tensor([1, 16, 1, 128])]; tensor var_2049 = reshape(shape = var_2048, x = var_2043)[name = string("op_2049")]; string var_2065_pad_type_0 = const()[name = string("op_2065_pad_type_0"), val = string("valid")]; tensor var_2065_strides_0 = const()[name = string("op_2065_strides_0"), val = tensor([1, 1])]; tensor var_2065_pad_0 = const()[name = string("op_2065_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2065_dilations_0 = const()[name = string("op_2065_dilations_0"), val = tensor([1, 1])]; int32 var_2065_groups_0 = const()[name = string("op_2065_groups_0"), val = int32(1)]; tensor var_2065 = conv(dilations = var_2065_dilations_0, groups = var_2065_groups_0, pad = var_2065_pad_0, pad_type = var_2065_pad_type_0, strides = var_2065_strides_0, weight = model_model_layers_1_self_attn_k_proj_weight_palettized, x = var_2027)[name = string("op_2065")]; tensor var_2070 = const()[name = string("op_2070"), val = tensor([1, 8, 1, 128])]; tensor var_2071 = reshape(shape = var_2070, x = var_2065)[name = string("op_2071")]; string var_2087_pad_type_0 = const()[name = string("op_2087_pad_type_0"), val = string("valid")]; tensor var_2087_strides_0 = const()[name = string("op_2087_strides_0"), val = tensor([1, 1])]; tensor var_2087_pad_0 = const()[name = string("op_2087_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2087_dilations_0 = const()[name = string("op_2087_dilations_0"), val = tensor([1, 1])]; int32 var_2087_groups_0 = const()[name = string("op_2087_groups_0"), val = int32(1)]; tensor var_2087 = conv(dilations = var_2087_dilations_0, groups = var_2087_groups_0, pad = var_2087_pad_0, pad_type = var_2087_pad_type_0, strides = var_2087_strides_0, weight = model_model_layers_1_self_attn_v_proj_weight_palettized, x = var_2027)[name = string("op_2087")]; tensor var_2092 = const()[name = string("op_2092"), val = tensor([1, 8, 1, 128])]; tensor var_2093 = reshape(shape = var_2092, x = var_2087)[name = string("op_2093")]; tensor mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor([-1])]; bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)]; tensor mean_11 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = var_2049)[name = string("mean_11")]; tensor input_23 = sub(x = var_2049, y = mean_11)[name = string("input_23")]; tensor var_2114_axes_0 = const()[name = string("op_2114_axes_0"), val = tensor([-1])]; tensor model_model_layers_1_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_1_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1013909632)))]; fp16 var_2102_to_fp16 = const()[name = string("op_2102_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2114_cast_fp16 = layer_norm(axes = var_2114_axes_0, epsilon = var_2102_to_fp16, gamma = model_model_layers_1_self_attn_q_norm_weight_to_fp16, x = input_23)[name = string("op_2114_cast_fp16")]; tensor mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor([-1])]; bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)]; tensor mean_13 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = var_2071)[name = string("mean_13")]; tensor input_25 = sub(x = var_2071, y = mean_13)[name = string("input_25")]; tensor var_2132_axes_0 = const()[name = string("op_2132_axes_0"), val = tensor([-1])]; tensor model_model_layers_1_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_1_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1013909952)))]; fp16 var_2120_to_fp16 = const()[name = string("op_2120_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2132_cast_fp16 = layer_norm(axes = var_2132_axes_0, epsilon = var_2120_to_fp16, gamma = model_model_layers_1_self_attn_k_norm_weight_to_fp16, x = input_25)[name = string("op_2132_cast_fp16")]; tensor var_2135 = mul(x = var_2114_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2135")]; tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = var_2114_cast_fp16)[name = string("x1_5")]; tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = var_2114_cast_fp16)[name = string("x2_5")]; fp16 const_23_promoted = const()[name = string("const_23_promoted"), val = fp16(-0x1p+0)]; tensor var_2156 = mul(x = x2_5, y = const_23_promoted)[name = string("op_2156")]; int32 var_2158 = const()[name = string("op_2158"), val = int32(-1)]; bool var_2159_interleave_0 = const()[name = string("op_2159_interleave_0"), val = bool(false)]; tensor var_2159 = concat(axis = var_2158, interleave = var_2159_interleave_0, values = (var_2156, x1_5))[name = string("op_2159")]; tensor var_2160 = mul(x = var_2159, y = sin_1_cast_fp16)[name = string("op_2160")]; tensor query_states_5 = add(x = var_2135, y = var_2160)[name = string("query_states_5")]; tensor var_2163 = mul(x = var_2132_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2163")]; tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = var_2132_cast_fp16)[name = string("x1_7")]; tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = var_2132_cast_fp16)[name = string("x2_7")]; fp16 const_26_promoted = const()[name = string("const_26_promoted"), val = fp16(-0x1p+0)]; tensor var_2184 = mul(x = x2_7, y = const_26_promoted)[name = string("op_2184")]; int32 var_2186 = const()[name = string("op_2186"), val = int32(-1)]; bool var_2187_interleave_0 = const()[name = string("op_2187_interleave_0"), val = bool(false)]; tensor var_2187 = concat(axis = var_2186, interleave = var_2187_interleave_0, values = (var_2184, x1_7))[name = string("op_2187")]; tensor var_2188 = mul(x = var_2187, y = sin_1_cast_fp16)[name = string("op_2188")]; tensor key_states_5 = add(x = var_2163, y = var_2188)[name = string("key_states_5")]; tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([1])]; tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([2])]; int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)]; bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)]; tensor concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_10")]; tensor concat_11_values1_0 = const()[name = string("concat_11_values1_0"), val = tensor([0])]; tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_16, concat_11_values1_0, var_1725, concat_11_values3_0))[name = string("concat_11")]; tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_10, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_11, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = key_states_5, x = coreml_update_state_57)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_2_write_state")]; tensor coreml_update_state_58 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_2")]; tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([29])]; tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([30])]; int32 concat_14_axis_0 = const()[name = string("concat_14_axis_0"), val = int32(0)]; bool concat_14_interleave_0 = const()[name = string("concat_14_interleave_0"), val = bool(false)]; tensor concat_14 = concat(axis = concat_14_axis_0, interleave = concat_14_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_14")]; tensor concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor([0])]; tensor concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor([0])]; int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (expand_dims_22, concat_15_values1_0, var_1725, concat_15_values3_0))[name = string("concat_15")]; tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_14, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_15, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = var_2093, x = coreml_update_state_58)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_3_write_state")]; tensor coreml_update_state_59 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_3")]; tensor var_2243_begin_0 = const()[name = string("op_2243_begin_0"), val = tensor([1, 0, 0, 0])]; tensor var_2243_end_0 = const()[name = string("op_2243_end_0"), val = tensor([2, 8, 1024, 128])]; tensor var_2243_end_mask_0 = const()[name = string("op_2243_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2243_cast_fp16 = slice_by_index(begin = var_2243_begin_0, end = var_2243_end_0, end_mask = var_2243_end_mask_0, x = coreml_update_state_59)[name = string("op_2243_cast_fp16")]; tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_2243_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; tensor var_2250_begin_0 = const()[name = string("op_2250_begin_0"), val = tensor([29, 0, 0, 0])]; tensor var_2250_end_0 = const()[name = string("op_2250_end_0"), val = tensor([30, 8, 1024, 128])]; tensor var_2250_end_mask_0 = const()[name = string("op_2250_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2250_cast_fp16 = slice_by_index(begin = var_2250_begin_0, end = var_2250_end_0, end_mask = var_2250_end_mask_0, x = coreml_update_state_59)[name = string("op_2250_cast_fp16")]; tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_2250_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; tensor x_27_axes_0 = const()[name = string("x_27_axes_0"), val = tensor([1])]; tensor x_27_cast_fp16 = expand_dims(axes = x_27_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_27_cast_fp16")]; tensor var_2287 = const()[name = string("op_2287"), val = tensor([1, 2, 1, 1])]; tensor x_29_cast_fp16 = tile(reps = var_2287, x = x_27_cast_fp16)[name = string("x_29_cast_fp16")]; tensor var_2299 = const()[name = string("op_2299"), val = tensor([1, -1, 1024, 128])]; tensor key_states_7_cast_fp16 = reshape(shape = var_2299, x = x_29_cast_fp16)[name = string("key_states_7_cast_fp16")]; tensor x_33_axes_0 = const()[name = string("x_33_axes_0"), val = tensor([1])]; tensor x_33_cast_fp16 = expand_dims(axes = x_33_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_33_cast_fp16")]; tensor var_2307 = const()[name = string("op_2307"), val = tensor([1, 2, 1, 1])]; tensor x_35_cast_fp16 = tile(reps = var_2307, x = x_33_cast_fp16)[name = string("x_35_cast_fp16")]; tensor var_2319 = const()[name = string("op_2319"), val = tensor([1, -1, 1024, 128])]; tensor value_states_9_cast_fp16 = reshape(shape = var_2319, x = x_35_cast_fp16)[name = string("value_states_9_cast_fp16")]; bool var_2334_transpose_x_1 = const()[name = string("op_2334_transpose_x_1"), val = bool(false)]; bool var_2334_transpose_y_1 = const()[name = string("op_2334_transpose_y_1"), val = bool(true)]; tensor var_2334 = matmul(transpose_x = var_2334_transpose_x_1, transpose_y = var_2334_transpose_y_1, x = query_states_5, y = key_states_7_cast_fp16)[name = string("op_2334")]; fp16 var_2335_to_fp16 = const()[name = string("op_2335_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_7_cast_fp16 = mul(x = var_2334, y = var_2335_to_fp16)[name = string("attn_weights_7_cast_fp16")]; tensor attn_weights_9_cast_fp16 = add(x = attn_weights_7_cast_fp16, y = causal_mask)[name = string("attn_weights_9_cast_fp16")]; int32 var_2370 = const()[name = string("op_2370"), val = int32(-1)]; tensor attn_weights_11_cast_fp16 = softmax(axis = var_2370, x = attn_weights_9_cast_fp16)[name = string("attn_weights_11_cast_fp16")]; bool attn_output_11_transpose_x_0 = const()[name = string("attn_output_11_transpose_x_0"), val = bool(false)]; bool attn_output_11_transpose_y_0 = const()[name = string("attn_output_11_transpose_y_0"), val = bool(false)]; tensor attn_output_11_cast_fp16 = matmul(transpose_x = attn_output_11_transpose_x_0, transpose_y = attn_output_11_transpose_y_0, x = attn_weights_11_cast_fp16, y = value_states_9_cast_fp16)[name = string("attn_output_11_cast_fp16")]; tensor var_2381_perm_0 = const()[name = string("op_2381_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2385 = const()[name = string("op_2385"), val = tensor([1, 1, 2048])]; tensor var_2381_cast_fp16 = transpose(perm = var_2381_perm_0, x = attn_output_11_cast_fp16)[name = string("transpose_160")]; tensor attn_output_15_cast_fp16 = reshape(shape = var_2385, x = var_2381_cast_fp16)[name = string("attn_output_15_cast_fp16")]; tensor var_2390 = const()[name = string("op_2390"), val = tensor([0, 2, 1])]; string var_2406_pad_type_0 = const()[name = string("op_2406_pad_type_0"), val = string("valid")]; int32 var_2406_groups_0 = const()[name = string("op_2406_groups_0"), val = int32(1)]; tensor var_2406_strides_0 = const()[name = string("op_2406_strides_0"), val = tensor([1])]; tensor var_2406_pad_0 = const()[name = string("op_2406_pad_0"), val = tensor([0, 0])]; tensor var_2406_dilations_0 = const()[name = string("op_2406_dilations_0"), val = tensor([1])]; tensor squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1013910272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1017056064))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_2391_cast_fp16 = transpose(perm = var_2390, x = attn_output_15_cast_fp16)[name = string("transpose_159")]; tensor var_2406_cast_fp16 = conv(dilations = var_2406_dilations_0, groups = var_2406_groups_0, pad = var_2406_pad_0, pad_type = var_2406_pad_type_0, strides = var_2406_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_2391_cast_fp16)[name = string("op_2406_cast_fp16")]; tensor var_2410 = const()[name = string("op_2410"), val = tensor([0, 2, 1])]; tensor attn_output_19_cast_fp16 = transpose(perm = var_2410, x = var_2406_cast_fp16)[name = string("transpose_158")]; tensor hidden_states_11_cast_fp16 = add(x = hidden_states_7_cast_fp16, y = attn_output_19_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; tensor mean_15_axes_0 = const()[name = string("mean_15_axes_0"), val = tensor([-1])]; bool mean_15_keep_dims_0 = const()[name = string("mean_15_keep_dims_0"), val = bool(true)]; tensor mean_15_cast_fp16 = reduce_mean(axes = mean_15_axes_0, keep_dims = mean_15_keep_dims_0, x = hidden_states_11_cast_fp16)[name = string("mean_15_cast_fp16")]; tensor input_29_cast_fp16 = sub(x = hidden_states_11_cast_fp16, y = mean_15_cast_fp16)[name = string("input_29_cast_fp16")]; tensor var_2429_axes_0 = const()[name = string("op_2429_axes_0"), val = tensor([-1])]; tensor model_model_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1017088896)))]; fp16 var_2417_to_fp16 = const()[name = string("op_2417_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2429_cast_fp16 = layer_norm(axes = var_2429_axes_0, epsilon = var_2417_to_fp16, gamma = model_model_layers_1_post_attention_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_2429_cast_fp16")]; tensor var_2443 = const()[name = string("op_2443"), val = tensor([0, 2, 1])]; tensor input_31_axes_0 = const()[name = string("input_31_axes_0"), val = tensor([2])]; tensor var_2444 = transpose(perm = var_2443, x = var_2429_cast_fp16)[name = string("transpose_157")]; tensor input_31 = expand_dims(axes = input_31_axes_0, x = var_2444)[name = string("input_31")]; string input_33_pad_type_0 = const()[name = string("input_33_pad_type_0"), val = string("valid")]; tensor input_33_strides_0 = const()[name = string("input_33_strides_0"), val = tensor([1, 1])]; tensor input_33_pad_0 = const()[name = string("input_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_33_dilations_0 = const()[name = string("input_33_dilations_0"), val = tensor([1, 1])]; int32 input_33_groups_0 = const()[name = string("input_33_groups_0"), val = int32(1)]; tensor input_33 = conv(dilations = input_33_dilations_0, groups = input_33_groups_0, pad = input_33_pad_0, pad_type = input_33_pad_type_0, strides = input_33_strides_0, weight = model_model_layers_1_mlp_gate_proj_weight_palettized, x = input_31)[name = string("input_33")]; string b_3_pad_type_0 = const()[name = string("b_3_pad_type_0"), val = string("valid")]; tensor b_3_strides_0 = const()[name = string("b_3_strides_0"), val = tensor([1, 1])]; tensor b_3_pad_0 = const()[name = string("b_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_3_dilations_0 = const()[name = string("b_3_dilations_0"), val = tensor([1, 1])]; int32 b_3_groups_0 = const()[name = string("b_3_groups_0"), val = int32(1)]; tensor b_3 = conv(dilations = b_3_dilations_0, groups = b_3_groups_0, pad = b_3_pad_0, pad_type = b_3_pad_type_0, strides = b_3_strides_0, weight = model_model_layers_1_mlp_up_proj_weight_palettized, x = input_31)[name = string("b_3")]; tensor c_3 = silu(x = input_33)[name = string("c_3")]; tensor input_35 = mul(x = c_3, y = b_3)[name = string("input_35")]; string e_3_pad_type_0 = const()[name = string("e_3_pad_type_0"), val = string("valid")]; tensor e_3_strides_0 = const()[name = string("e_3_strides_0"), val = tensor([1, 1])]; tensor e_3_pad_0 = const()[name = string("e_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_3_dilations_0 = const()[name = string("e_3_dilations_0"), val = tensor([1, 1])]; int32 e_3_groups_0 = const()[name = string("e_3_groups_0"), val = int32(1)]; tensor e_3 = conv(dilations = e_3_dilations_0, groups = e_3_groups_0, pad = e_3_pad_0, pad_type = e_3_pad_type_0, strides = e_3_strides_0, weight = model_model_layers_1_mlp_down_proj_weight_palettized, x = input_35)[name = string("e_3")]; tensor var_2466_axes_0 = const()[name = string("op_2466_axes_0"), val = tensor([2])]; tensor var_2466 = squeeze(axes = var_2466_axes_0, x = e_3)[name = string("op_2466")]; tensor var_2467 = const()[name = string("op_2467"), val = tensor([0, 2, 1])]; tensor var_2468 = transpose(perm = var_2467, x = var_2466)[name = string("transpose_156")]; tensor hidden_states_13_cast_fp16 = add(x = hidden_states_11_cast_fp16, y = var_2468)[name = string("hidden_states_13_cast_fp16")]; tensor mean_17_axes_0 = const()[name = string("mean_17_axes_0"), val = tensor([-1])]; bool mean_17_keep_dims_0 = const()[name = string("mean_17_keep_dims_0"), val = bool(true)]; tensor mean_17_cast_fp16 = reduce_mean(axes = mean_17_axes_0, keep_dims = mean_17_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_17_cast_fp16")]; tensor input_37_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_17_cast_fp16)[name = string("input_37_cast_fp16")]; tensor var_2486_axes_0 = const()[name = string("op_2486_axes_0"), val = tensor([-1])]; tensor model_model_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1017093056)))]; fp16 var_2474_to_fp16 = const()[name = string("op_2474_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2486_cast_fp16 = layer_norm(axes = var_2486_axes_0, epsilon = var_2474_to_fp16, gamma = model_model_layers_2_input_layernorm_weight_to_fp16, x = input_37_cast_fp16)[name = string("op_2486_cast_fp16")]; tensor var_2492 = const()[name = string("op_2492"), val = tensor([0, 2, 1])]; tensor var_2495_axes_0 = const()[name = string("op_2495_axes_0"), val = tensor([2])]; tensor var_2493 = transpose(perm = var_2492, x = var_2486_cast_fp16)[name = string("transpose_155")]; tensor var_2495 = expand_dims(axes = var_2495_axes_0, x = var_2493)[name = string("op_2495")]; string var_2511_pad_type_0 = const()[name = string("op_2511_pad_type_0"), val = string("valid")]; tensor var_2511_strides_0 = const()[name = string("op_2511_strides_0"), val = tensor([1, 1])]; tensor var_2511_pad_0 = const()[name = string("op_2511_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2511_dilations_0 = const()[name = string("op_2511_dilations_0"), val = tensor([1, 1])]; int32 var_2511_groups_0 = const()[name = string("op_2511_groups_0"), val = int32(1)]; tensor var_2511 = conv(dilations = var_2511_dilations_0, groups = var_2511_groups_0, pad = var_2511_pad_0, pad_type = var_2511_pad_type_0, strides = var_2511_strides_0, weight = model_model_layers_2_self_attn_q_proj_weight_palettized, x = var_2495)[name = string("op_2511")]; tensor var_2516 = const()[name = string("op_2516"), val = tensor([1, 16, 1, 128])]; tensor var_2517 = reshape(shape = var_2516, x = var_2511)[name = string("op_2517")]; string var_2533_pad_type_0 = const()[name = string("op_2533_pad_type_0"), val = string("valid")]; tensor var_2533_strides_0 = const()[name = string("op_2533_strides_0"), val = tensor([1, 1])]; tensor var_2533_pad_0 = const()[name = string("op_2533_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2533_dilations_0 = const()[name = string("op_2533_dilations_0"), val = tensor([1, 1])]; int32 var_2533_groups_0 = const()[name = string("op_2533_groups_0"), val = int32(1)]; tensor var_2533 = conv(dilations = var_2533_dilations_0, groups = var_2533_groups_0, pad = var_2533_pad_0, pad_type = var_2533_pad_type_0, strides = var_2533_strides_0, weight = model_model_layers_2_self_attn_k_proj_weight_palettized, x = var_2495)[name = string("op_2533")]; tensor var_2538 = const()[name = string("op_2538"), val = tensor([1, 8, 1, 128])]; tensor var_2539 = reshape(shape = var_2538, x = var_2533)[name = string("op_2539")]; string var_2555_pad_type_0 = const()[name = string("op_2555_pad_type_0"), val = string("valid")]; tensor var_2555_strides_0 = const()[name = string("op_2555_strides_0"), val = tensor([1, 1])]; tensor var_2555_pad_0 = const()[name = string("op_2555_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2555_dilations_0 = const()[name = string("op_2555_dilations_0"), val = tensor([1, 1])]; int32 var_2555_groups_0 = const()[name = string("op_2555_groups_0"), val = int32(1)]; tensor var_2555 = conv(dilations = var_2555_dilations_0, groups = var_2555_groups_0, pad = var_2555_pad_0, pad_type = var_2555_pad_type_0, strides = var_2555_strides_0, weight = model_model_layers_2_self_attn_v_proj_weight_palettized, x = var_2495)[name = string("op_2555")]; tensor var_2560 = const()[name = string("op_2560"), val = tensor([1, 8, 1, 128])]; tensor var_2561 = reshape(shape = var_2560, x = var_2555)[name = string("op_2561")]; tensor mean_19_axes_0 = const()[name = string("mean_19_axes_0"), val = tensor([-1])]; bool mean_19_keep_dims_0 = const()[name = string("mean_19_keep_dims_0"), val = bool(true)]; tensor mean_19 = reduce_mean(axes = mean_19_axes_0, keep_dims = mean_19_keep_dims_0, x = var_2517)[name = string("mean_19")]; tensor input_41 = sub(x = var_2517, y = mean_19)[name = string("input_41")]; tensor var_2582_axes_0 = const()[name = string("op_2582_axes_0"), val = tensor([-1])]; tensor model_model_layers_2_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_2_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1017097216)))]; fp16 var_2570_to_fp16 = const()[name = string("op_2570_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2582_cast_fp16 = layer_norm(axes = var_2582_axes_0, epsilon = var_2570_to_fp16, gamma = model_model_layers_2_self_attn_q_norm_weight_to_fp16, x = input_41)[name = string("op_2582_cast_fp16")]; tensor mean_21_axes_0 = const()[name = string("mean_21_axes_0"), val = tensor([-1])]; bool mean_21_keep_dims_0 = const()[name = string("mean_21_keep_dims_0"), val = bool(true)]; tensor mean_21 = reduce_mean(axes = mean_21_axes_0, keep_dims = mean_21_keep_dims_0, x = var_2539)[name = string("mean_21")]; tensor input_43 = sub(x = var_2539, y = mean_21)[name = string("input_43")]; tensor var_2600_axes_0 = const()[name = string("op_2600_axes_0"), val = tensor([-1])]; tensor model_model_layers_2_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_2_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1017097536)))]; fp16 var_2588_to_fp16 = const()[name = string("op_2588_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2600_cast_fp16 = layer_norm(axes = var_2600_axes_0, epsilon = var_2588_to_fp16, gamma = model_model_layers_2_self_attn_k_norm_weight_to_fp16, x = input_43)[name = string("op_2600_cast_fp16")]; tensor var_2603 = mul(x = var_2582_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2603")]; tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = var_2582_cast_fp16)[name = string("x1_9")]; tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = var_2582_cast_fp16)[name = string("x2_9")]; fp16 const_41_promoted = const()[name = string("const_41_promoted"), val = fp16(-0x1p+0)]; tensor var_2624 = mul(x = x2_9, y = const_41_promoted)[name = string("op_2624")]; int32 var_2626 = const()[name = string("op_2626"), val = int32(-1)]; bool var_2627_interleave_0 = const()[name = string("op_2627_interleave_0"), val = bool(false)]; tensor var_2627 = concat(axis = var_2626, interleave = var_2627_interleave_0, values = (var_2624, x1_9))[name = string("op_2627")]; tensor var_2628 = mul(x = var_2627, y = sin_1_cast_fp16)[name = string("op_2628")]; tensor query_states_9 = add(x = var_2603, y = var_2628)[name = string("query_states_9")]; tensor var_2631 = mul(x = var_2600_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2631")]; tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = var_2600_cast_fp16)[name = string("x1_11")]; tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = var_2600_cast_fp16)[name = string("x2_11")]; fp16 const_44_promoted = const()[name = string("const_44_promoted"), val = fp16(-0x1p+0)]; tensor var_2652 = mul(x = x2_11, y = const_44_promoted)[name = string("op_2652")]; int32 var_2654 = const()[name = string("op_2654"), val = int32(-1)]; bool var_2655_interleave_0 = const()[name = string("op_2655_interleave_0"), val = bool(false)]; tensor var_2655 = concat(axis = var_2654, interleave = var_2655_interleave_0, values = (var_2652, x1_11))[name = string("op_2655")]; tensor var_2656 = mul(x = var_2655, y = sin_1_cast_fp16)[name = string("op_2656")]; tensor key_states_9 = add(x = var_2631, y = var_2656)[name = string("key_states_9")]; tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([2])]; tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([3])]; int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)]; bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)]; tensor concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_18")]; tensor concat_19_values1_0 = const()[name = string("concat_19_values1_0"), val = tensor([0])]; tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_28, concat_19_values1_0, var_1725, concat_19_values3_0))[name = string("concat_19")]; tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_18, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_19, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = key_states_9, x = coreml_update_state_59)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_4_write_state")]; tensor coreml_update_state_60 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_4")]; tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([30])]; tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([31])]; int32 concat_22_axis_0 = const()[name = string("concat_22_axis_0"), val = int32(0)]; bool concat_22_interleave_0 = const()[name = string("concat_22_interleave_0"), val = bool(false)]; tensor concat_22 = concat(axis = concat_22_axis_0, interleave = concat_22_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_22")]; tensor concat_23_values1_0 = const()[name = string("concat_23_values1_0"), val = tensor([0])]; tensor concat_23_values3_0 = const()[name = string("concat_23_values3_0"), val = tensor([0])]; int32 concat_23_axis_0 = const()[name = string("concat_23_axis_0"), val = int32(0)]; bool concat_23_interleave_0 = const()[name = string("concat_23_interleave_0"), val = bool(false)]; tensor concat_23 = concat(axis = concat_23_axis_0, interleave = concat_23_interleave_0, values = (expand_dims_34, concat_23_values1_0, var_1725, concat_23_values3_0))[name = string("concat_23")]; tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_22, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_23, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = var_2561, x = coreml_update_state_60)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_5_write_state")]; tensor coreml_update_state_61 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_5")]; tensor var_2711_begin_0 = const()[name = string("op_2711_begin_0"), val = tensor([2, 0, 0, 0])]; tensor var_2711_end_0 = const()[name = string("op_2711_end_0"), val = tensor([3, 8, 1024, 128])]; tensor var_2711_end_mask_0 = const()[name = string("op_2711_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2711_cast_fp16 = slice_by_index(begin = var_2711_begin_0, end = var_2711_end_0, end_mask = var_2711_end_mask_0, x = coreml_update_state_61)[name = string("op_2711_cast_fp16")]; tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_2711_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; tensor var_2718_begin_0 = const()[name = string("op_2718_begin_0"), val = tensor([30, 0, 0, 0])]; tensor var_2718_end_0 = const()[name = string("op_2718_end_0"), val = tensor([31, 8, 1024, 128])]; tensor var_2718_end_mask_0 = const()[name = string("op_2718_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2718_cast_fp16 = slice_by_index(begin = var_2718_begin_0, end = var_2718_end_0, end_mask = var_2718_end_mask_0, x = coreml_update_state_61)[name = string("op_2718_cast_fp16")]; tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_2718_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; tensor x_47_axes_0 = const()[name = string("x_47_axes_0"), val = tensor([1])]; tensor x_47_cast_fp16 = expand_dims(axes = x_47_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_47_cast_fp16")]; tensor var_2755 = const()[name = string("op_2755"), val = tensor([1, 2, 1, 1])]; tensor x_49_cast_fp16 = tile(reps = var_2755, x = x_47_cast_fp16)[name = string("x_49_cast_fp16")]; tensor var_2767 = const()[name = string("op_2767"), val = tensor([1, -1, 1024, 128])]; tensor key_states_11_cast_fp16 = reshape(shape = var_2767, x = x_49_cast_fp16)[name = string("key_states_11_cast_fp16")]; tensor x_53_axes_0 = const()[name = string("x_53_axes_0"), val = tensor([1])]; tensor x_53_cast_fp16 = expand_dims(axes = x_53_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_53_cast_fp16")]; tensor var_2775 = const()[name = string("op_2775"), val = tensor([1, 2, 1, 1])]; tensor x_55_cast_fp16 = tile(reps = var_2775, x = x_53_cast_fp16)[name = string("x_55_cast_fp16")]; tensor var_2787 = const()[name = string("op_2787"), val = tensor([1, -1, 1024, 128])]; tensor value_states_15_cast_fp16 = reshape(shape = var_2787, x = x_55_cast_fp16)[name = string("value_states_15_cast_fp16")]; bool var_2802_transpose_x_1 = const()[name = string("op_2802_transpose_x_1"), val = bool(false)]; bool var_2802_transpose_y_1 = const()[name = string("op_2802_transpose_y_1"), val = bool(true)]; tensor var_2802 = matmul(transpose_x = var_2802_transpose_x_1, transpose_y = var_2802_transpose_y_1, x = query_states_9, y = key_states_11_cast_fp16)[name = string("op_2802")]; fp16 var_2803_to_fp16 = const()[name = string("op_2803_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_13_cast_fp16 = mul(x = var_2802, y = var_2803_to_fp16)[name = string("attn_weights_13_cast_fp16")]; tensor attn_weights_15_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("attn_weights_15_cast_fp16")]; int32 var_2838 = const()[name = string("op_2838"), val = int32(-1)]; tensor attn_weights_17_cast_fp16 = softmax(axis = var_2838, x = attn_weights_15_cast_fp16)[name = string("attn_weights_17_cast_fp16")]; bool attn_output_21_transpose_x_0 = const()[name = string("attn_output_21_transpose_x_0"), val = bool(false)]; bool attn_output_21_transpose_y_0 = const()[name = string("attn_output_21_transpose_y_0"), val = bool(false)]; tensor attn_output_21_cast_fp16 = matmul(transpose_x = attn_output_21_transpose_x_0, transpose_y = attn_output_21_transpose_y_0, x = attn_weights_17_cast_fp16, y = value_states_15_cast_fp16)[name = string("attn_output_21_cast_fp16")]; tensor var_2849_perm_0 = const()[name = string("op_2849_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2853 = const()[name = string("op_2853"), val = tensor([1, 1, 2048])]; tensor var_2849_cast_fp16 = transpose(perm = var_2849_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_154")]; tensor attn_output_25_cast_fp16 = reshape(shape = var_2853, x = var_2849_cast_fp16)[name = string("attn_output_25_cast_fp16")]; tensor var_2858 = const()[name = string("op_2858"), val = tensor([0, 2, 1])]; string var_2874_pad_type_0 = const()[name = string("op_2874_pad_type_0"), val = string("valid")]; int32 var_2874_groups_0 = const()[name = string("op_2874_groups_0"), val = int32(1)]; tensor var_2874_strides_0 = const()[name = string("op_2874_strides_0"), val = tensor([1])]; tensor var_2874_pad_0 = const()[name = string("op_2874_pad_0"), val = tensor([0, 0])]; tensor var_2874_dilations_0 = const()[name = string("op_2874_dilations_0"), val = tensor([1])]; tensor squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1017097856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1020243648))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_2859_cast_fp16 = transpose(perm = var_2858, x = attn_output_25_cast_fp16)[name = string("transpose_153")]; tensor var_2874_cast_fp16 = conv(dilations = var_2874_dilations_0, groups = var_2874_groups_0, pad = var_2874_pad_0, pad_type = var_2874_pad_type_0, strides = var_2874_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_2859_cast_fp16)[name = string("op_2874_cast_fp16")]; tensor var_2878 = const()[name = string("op_2878"), val = tensor([0, 2, 1])]; tensor attn_output_29_cast_fp16 = transpose(perm = var_2878, x = var_2874_cast_fp16)[name = string("transpose_152")]; tensor hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = attn_output_29_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; tensor mean_23_axes_0 = const()[name = string("mean_23_axes_0"), val = tensor([-1])]; bool mean_23_keep_dims_0 = const()[name = string("mean_23_keep_dims_0"), val = bool(true)]; tensor mean_23_cast_fp16 = reduce_mean(axes = mean_23_axes_0, keep_dims = mean_23_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_23_cast_fp16")]; tensor input_47_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_23_cast_fp16)[name = string("input_47_cast_fp16")]; tensor var_2897_axes_0 = const()[name = string("op_2897_axes_0"), val = tensor([-1])]; tensor model_model_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1020276480)))]; fp16 var_2885_to_fp16 = const()[name = string("op_2885_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2897_cast_fp16 = layer_norm(axes = var_2897_axes_0, epsilon = var_2885_to_fp16, gamma = model_model_layers_2_post_attention_layernorm_weight_to_fp16, x = input_47_cast_fp16)[name = string("op_2897_cast_fp16")]; tensor var_2911 = const()[name = string("op_2911"), val = tensor([0, 2, 1])]; tensor input_49_axes_0 = const()[name = string("input_49_axes_0"), val = tensor([2])]; tensor var_2912 = transpose(perm = var_2911, x = var_2897_cast_fp16)[name = string("transpose_151")]; tensor input_49 = expand_dims(axes = input_49_axes_0, x = var_2912)[name = string("input_49")]; string input_51_pad_type_0 = const()[name = string("input_51_pad_type_0"), val = string("valid")]; tensor input_51_strides_0 = const()[name = string("input_51_strides_0"), val = tensor([1, 1])]; tensor input_51_pad_0 = const()[name = string("input_51_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_51_dilations_0 = const()[name = string("input_51_dilations_0"), val = tensor([1, 1])]; int32 input_51_groups_0 = const()[name = string("input_51_groups_0"), val = int32(1)]; tensor input_51 = conv(dilations = input_51_dilations_0, groups = input_51_groups_0, pad = input_51_pad_0, pad_type = input_51_pad_type_0, strides = input_51_strides_0, weight = model_model_layers_2_mlp_gate_proj_weight_palettized, x = input_49)[name = string("input_51")]; string b_5_pad_type_0 = const()[name = string("b_5_pad_type_0"), val = string("valid")]; tensor b_5_strides_0 = const()[name = string("b_5_strides_0"), val = tensor([1, 1])]; tensor b_5_pad_0 = const()[name = string("b_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_5_dilations_0 = const()[name = string("b_5_dilations_0"), val = tensor([1, 1])]; int32 b_5_groups_0 = const()[name = string("b_5_groups_0"), val = int32(1)]; tensor b_5 = conv(dilations = b_5_dilations_0, groups = b_5_groups_0, pad = b_5_pad_0, pad_type = b_5_pad_type_0, strides = b_5_strides_0, weight = model_model_layers_2_mlp_up_proj_weight_palettized, x = input_49)[name = string("b_5")]; tensor c_5 = silu(x = input_51)[name = string("c_5")]; tensor input_53 = mul(x = c_5, y = b_5)[name = string("input_53")]; string e_5_pad_type_0 = const()[name = string("e_5_pad_type_0"), val = string("valid")]; tensor e_5_strides_0 = const()[name = string("e_5_strides_0"), val = tensor([1, 1])]; tensor e_5_pad_0 = const()[name = string("e_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_5_dilations_0 = const()[name = string("e_5_dilations_0"), val = tensor([1, 1])]; int32 e_5_groups_0 = const()[name = string("e_5_groups_0"), val = int32(1)]; tensor e_5 = conv(dilations = e_5_dilations_0, groups = e_5_groups_0, pad = e_5_pad_0, pad_type = e_5_pad_type_0, strides = e_5_strides_0, weight = model_model_layers_2_mlp_down_proj_weight_palettized, x = input_53)[name = string("e_5")]; tensor var_2934_axes_0 = const()[name = string("op_2934_axes_0"), val = tensor([2])]; tensor var_2934 = squeeze(axes = var_2934_axes_0, x = e_5)[name = string("op_2934")]; tensor var_2935 = const()[name = string("op_2935"), val = tensor([0, 2, 1])]; tensor var_2936 = transpose(perm = var_2935, x = var_2934)[name = string("transpose_150")]; tensor hidden_states_19_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = var_2936)[name = string("hidden_states_19_cast_fp16")]; tensor mean_25_axes_0 = const()[name = string("mean_25_axes_0"), val = tensor([-1])]; bool mean_25_keep_dims_0 = const()[name = string("mean_25_keep_dims_0"), val = bool(true)]; tensor mean_25_cast_fp16 = reduce_mean(axes = mean_25_axes_0, keep_dims = mean_25_keep_dims_0, x = hidden_states_19_cast_fp16)[name = string("mean_25_cast_fp16")]; tensor input_55_cast_fp16 = sub(x = hidden_states_19_cast_fp16, y = mean_25_cast_fp16)[name = string("input_55_cast_fp16")]; tensor var_2954_axes_0 = const()[name = string("op_2954_axes_0"), val = tensor([-1])]; tensor model_model_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1020280640)))]; fp16 var_2942_to_fp16 = const()[name = string("op_2942_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2954_cast_fp16 = layer_norm(axes = var_2954_axes_0, epsilon = var_2942_to_fp16, gamma = model_model_layers_3_input_layernorm_weight_to_fp16, x = input_55_cast_fp16)[name = string("op_2954_cast_fp16")]; tensor var_2960 = const()[name = string("op_2960"), val = tensor([0, 2, 1])]; tensor var_2963_axes_0 = const()[name = string("op_2963_axes_0"), val = tensor([2])]; tensor var_2961 = transpose(perm = var_2960, x = var_2954_cast_fp16)[name = string("transpose_149")]; tensor var_2963 = expand_dims(axes = var_2963_axes_0, x = var_2961)[name = string("op_2963")]; string var_2979_pad_type_0 = const()[name = string("op_2979_pad_type_0"), val = string("valid")]; tensor var_2979_strides_0 = const()[name = string("op_2979_strides_0"), val = tensor([1, 1])]; tensor var_2979_pad_0 = const()[name = string("op_2979_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2979_dilations_0 = const()[name = string("op_2979_dilations_0"), val = tensor([1, 1])]; int32 var_2979_groups_0 = const()[name = string("op_2979_groups_0"), val = int32(1)]; tensor var_2979 = conv(dilations = var_2979_dilations_0, groups = var_2979_groups_0, pad = var_2979_pad_0, pad_type = var_2979_pad_type_0, strides = var_2979_strides_0, weight = model_model_layers_3_self_attn_q_proj_weight_palettized, x = var_2963)[name = string("op_2979")]; tensor var_2984 = const()[name = string("op_2984"), val = tensor([1, 16, 1, 128])]; tensor var_2985 = reshape(shape = var_2984, x = var_2979)[name = string("op_2985")]; string var_3001_pad_type_0 = const()[name = string("op_3001_pad_type_0"), val = string("valid")]; tensor var_3001_strides_0 = const()[name = string("op_3001_strides_0"), val = tensor([1, 1])]; tensor var_3001_pad_0 = const()[name = string("op_3001_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3001_dilations_0 = const()[name = string("op_3001_dilations_0"), val = tensor([1, 1])]; int32 var_3001_groups_0 = const()[name = string("op_3001_groups_0"), val = int32(1)]; tensor var_3001 = conv(dilations = var_3001_dilations_0, groups = var_3001_groups_0, pad = var_3001_pad_0, pad_type = var_3001_pad_type_0, strides = var_3001_strides_0, weight = model_model_layers_3_self_attn_k_proj_weight_palettized, x = var_2963)[name = string("op_3001")]; tensor var_3006 = const()[name = string("op_3006"), val = tensor([1, 8, 1, 128])]; tensor var_3007 = reshape(shape = var_3006, x = var_3001)[name = string("op_3007")]; string var_3023_pad_type_0 = const()[name = string("op_3023_pad_type_0"), val = string("valid")]; tensor var_3023_strides_0 = const()[name = string("op_3023_strides_0"), val = tensor([1, 1])]; tensor var_3023_pad_0 = const()[name = string("op_3023_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3023_dilations_0 = const()[name = string("op_3023_dilations_0"), val = tensor([1, 1])]; int32 var_3023_groups_0 = const()[name = string("op_3023_groups_0"), val = int32(1)]; tensor var_3023 = conv(dilations = var_3023_dilations_0, groups = var_3023_groups_0, pad = var_3023_pad_0, pad_type = var_3023_pad_type_0, strides = var_3023_strides_0, weight = model_model_layers_3_self_attn_v_proj_weight_palettized, x = var_2963)[name = string("op_3023")]; tensor var_3028 = const()[name = string("op_3028"), val = tensor([1, 8, 1, 128])]; tensor var_3029 = reshape(shape = var_3028, x = var_3023)[name = string("op_3029")]; tensor mean_27_axes_0 = const()[name = string("mean_27_axes_0"), val = tensor([-1])]; bool mean_27_keep_dims_0 = const()[name = string("mean_27_keep_dims_0"), val = bool(true)]; tensor mean_27 = reduce_mean(axes = mean_27_axes_0, keep_dims = mean_27_keep_dims_0, x = var_2985)[name = string("mean_27")]; tensor input_59 = sub(x = var_2985, y = mean_27)[name = string("input_59")]; tensor var_3050_axes_0 = const()[name = string("op_3050_axes_0"), val = tensor([-1])]; tensor model_model_layers_3_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_3_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1020284800)))]; fp16 var_3038_to_fp16 = const()[name = string("op_3038_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3050_cast_fp16 = layer_norm(axes = var_3050_axes_0, epsilon = var_3038_to_fp16, gamma = model_model_layers_3_self_attn_q_norm_weight_to_fp16, x = input_59)[name = string("op_3050_cast_fp16")]; tensor mean_29_axes_0 = const()[name = string("mean_29_axes_0"), val = tensor([-1])]; bool mean_29_keep_dims_0 = const()[name = string("mean_29_keep_dims_0"), val = bool(true)]; tensor mean_29 = reduce_mean(axes = mean_29_axes_0, keep_dims = mean_29_keep_dims_0, x = var_3007)[name = string("mean_29")]; tensor input_61 = sub(x = var_3007, y = mean_29)[name = string("input_61")]; tensor var_3068_axes_0 = const()[name = string("op_3068_axes_0"), val = tensor([-1])]; tensor model_model_layers_3_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_3_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1020285120)))]; fp16 var_3056_to_fp16 = const()[name = string("op_3056_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3068_cast_fp16 = layer_norm(axes = var_3068_axes_0, epsilon = var_3056_to_fp16, gamma = model_model_layers_3_self_attn_k_norm_weight_to_fp16, x = input_61)[name = string("op_3068_cast_fp16")]; tensor var_3071 = mul(x = var_3050_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3071")]; tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = var_3050_cast_fp16)[name = string("x1_13")]; tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = var_3050_cast_fp16)[name = string("x2_13")]; fp16 const_59_promoted = const()[name = string("const_59_promoted"), val = fp16(-0x1p+0)]; tensor var_3092 = mul(x = x2_13, y = const_59_promoted)[name = string("op_3092")]; int32 var_3094 = const()[name = string("op_3094"), val = int32(-1)]; bool var_3095_interleave_0 = const()[name = string("op_3095_interleave_0"), val = bool(false)]; tensor var_3095 = concat(axis = var_3094, interleave = var_3095_interleave_0, values = (var_3092, x1_13))[name = string("op_3095")]; tensor var_3096 = mul(x = var_3095, y = sin_1_cast_fp16)[name = string("op_3096")]; tensor query_states_13 = add(x = var_3071, y = var_3096)[name = string("query_states_13")]; tensor var_3099 = mul(x = var_3068_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3099")]; tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = var_3068_cast_fp16)[name = string("x1_15")]; tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = var_3068_cast_fp16)[name = string("x2_15")]; fp16 const_62_promoted = const()[name = string("const_62_promoted"), val = fp16(-0x1p+0)]; tensor var_3120 = mul(x = x2_15, y = const_62_promoted)[name = string("op_3120")]; int32 var_3122 = const()[name = string("op_3122"), val = int32(-1)]; bool var_3123_interleave_0 = const()[name = string("op_3123_interleave_0"), val = bool(false)]; tensor var_3123 = concat(axis = var_3122, interleave = var_3123_interleave_0, values = (var_3120, x1_15))[name = string("op_3123")]; tensor var_3124 = mul(x = var_3123, y = sin_1_cast_fp16)[name = string("op_3124")]; tensor key_states_13 = add(x = var_3099, y = var_3124)[name = string("key_states_13")]; tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([3])]; tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([4])]; int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_26")]; tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_40, concat_27_values1_0, var_1725, concat_27_values3_0))[name = string("concat_27")]; tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_26, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_27, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = key_states_13, x = coreml_update_state_61)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_6_write_state")]; tensor coreml_update_state_62 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_6")]; tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([31])]; tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([32])]; int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)]; bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)]; tensor concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_30")]; tensor concat_31_values1_0 = const()[name = string("concat_31_values1_0"), val = tensor([0])]; tensor concat_31_values3_0 = const()[name = string("concat_31_values3_0"), val = tensor([0])]; int32 concat_31_axis_0 = const()[name = string("concat_31_axis_0"), val = int32(0)]; bool concat_31_interleave_0 = const()[name = string("concat_31_interleave_0"), val = bool(false)]; tensor concat_31 = concat(axis = concat_31_axis_0, interleave = concat_31_interleave_0, values = (expand_dims_46, concat_31_values1_0, var_1725, concat_31_values3_0))[name = string("concat_31")]; tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_30, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_31, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = var_3029, x = coreml_update_state_62)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_7_write_state")]; tensor coreml_update_state_63 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_7")]; tensor var_3179_begin_0 = const()[name = string("op_3179_begin_0"), val = tensor([3, 0, 0, 0])]; tensor var_3179_end_0 = const()[name = string("op_3179_end_0"), val = tensor([4, 8, 1024, 128])]; tensor var_3179_end_mask_0 = const()[name = string("op_3179_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3179_cast_fp16 = slice_by_index(begin = var_3179_begin_0, end = var_3179_end_0, end_mask = var_3179_end_mask_0, x = coreml_update_state_63)[name = string("op_3179_cast_fp16")]; tensor K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor([0])]; tensor K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_3179_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")]; tensor var_3186_begin_0 = const()[name = string("op_3186_begin_0"), val = tensor([31, 0, 0, 0])]; tensor var_3186_end_0 = const()[name = string("op_3186_end_0"), val = tensor([32, 8, 1024, 128])]; tensor var_3186_end_mask_0 = const()[name = string("op_3186_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3186_cast_fp16 = slice_by_index(begin = var_3186_begin_0, end = var_3186_end_0, end_mask = var_3186_end_mask_0, x = coreml_update_state_63)[name = string("op_3186_cast_fp16")]; tensor V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor([0])]; tensor V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_3186_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")]; tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_67_cast_fp16")]; tensor var_3223 = const()[name = string("op_3223"), val = tensor([1, 2, 1, 1])]; tensor x_69_cast_fp16 = tile(reps = var_3223, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; tensor var_3235 = const()[name = string("op_3235"), val = tensor([1, -1, 1024, 128])]; tensor key_states_15_cast_fp16 = reshape(shape = var_3235, x = x_69_cast_fp16)[name = string("key_states_15_cast_fp16")]; tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_73_cast_fp16")]; tensor var_3243 = const()[name = string("op_3243"), val = tensor([1, 2, 1, 1])]; tensor x_75_cast_fp16 = tile(reps = var_3243, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; tensor var_3255 = const()[name = string("op_3255"), val = tensor([1, -1, 1024, 128])]; tensor value_states_21_cast_fp16 = reshape(shape = var_3255, x = x_75_cast_fp16)[name = string("value_states_21_cast_fp16")]; bool var_3270_transpose_x_1 = const()[name = string("op_3270_transpose_x_1"), val = bool(false)]; bool var_3270_transpose_y_1 = const()[name = string("op_3270_transpose_y_1"), val = bool(true)]; tensor var_3270 = matmul(transpose_x = var_3270_transpose_x_1, transpose_y = var_3270_transpose_y_1, x = query_states_13, y = key_states_15_cast_fp16)[name = string("op_3270")]; fp16 var_3271_to_fp16 = const()[name = string("op_3271_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_19_cast_fp16 = mul(x = var_3270, y = var_3271_to_fp16)[name = string("attn_weights_19_cast_fp16")]; tensor attn_weights_21_cast_fp16 = add(x = attn_weights_19_cast_fp16, y = causal_mask)[name = string("attn_weights_21_cast_fp16")]; int32 var_3306 = const()[name = string("op_3306"), val = int32(-1)]; tensor attn_weights_23_cast_fp16 = softmax(axis = var_3306, x = attn_weights_21_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)]; bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)]; tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = attn_weights_23_cast_fp16, y = value_states_21_cast_fp16)[name = string("attn_output_31_cast_fp16")]; tensor var_3317_perm_0 = const()[name = string("op_3317_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3321 = const()[name = string("op_3321"), val = tensor([1, 1, 2048])]; tensor var_3317_cast_fp16 = transpose(perm = var_3317_perm_0, x = attn_output_31_cast_fp16)[name = string("transpose_148")]; tensor attn_output_35_cast_fp16 = reshape(shape = var_3321, x = var_3317_cast_fp16)[name = string("attn_output_35_cast_fp16")]; tensor var_3326 = const()[name = string("op_3326"), val = tensor([0, 2, 1])]; string var_3342_pad_type_0 = const()[name = string("op_3342_pad_type_0"), val = string("valid")]; int32 var_3342_groups_0 = const()[name = string("op_3342_groups_0"), val = int32(1)]; tensor var_3342_strides_0 = const()[name = string("op_3342_strides_0"), val = tensor([1])]; tensor var_3342_pad_0 = const()[name = string("op_3342_pad_0"), val = tensor([0, 0])]; tensor var_3342_dilations_0 = const()[name = string("op_3342_dilations_0"), val = tensor([1])]; tensor squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1020285440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023431232))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_3327_cast_fp16 = transpose(perm = var_3326, x = attn_output_35_cast_fp16)[name = string("transpose_147")]; tensor var_3342_cast_fp16 = conv(dilations = var_3342_dilations_0, groups = var_3342_groups_0, pad = var_3342_pad_0, pad_type = var_3342_pad_type_0, strides = var_3342_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_3327_cast_fp16)[name = string("op_3342_cast_fp16")]; tensor var_3346 = const()[name = string("op_3346"), val = tensor([0, 2, 1])]; tensor attn_output_39_cast_fp16 = transpose(perm = var_3346, x = var_3342_cast_fp16)[name = string("transpose_146")]; tensor hidden_states_23_cast_fp16 = add(x = hidden_states_19_cast_fp16, y = attn_output_39_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; tensor mean_31_axes_0 = const()[name = string("mean_31_axes_0"), val = tensor([-1])]; bool mean_31_keep_dims_0 = const()[name = string("mean_31_keep_dims_0"), val = bool(true)]; tensor mean_31_cast_fp16 = reduce_mean(axes = mean_31_axes_0, keep_dims = mean_31_keep_dims_0, x = hidden_states_23_cast_fp16)[name = string("mean_31_cast_fp16")]; tensor input_65_cast_fp16 = sub(x = hidden_states_23_cast_fp16, y = mean_31_cast_fp16)[name = string("input_65_cast_fp16")]; tensor var_3365_axes_0 = const()[name = string("op_3365_axes_0"), val = tensor([-1])]; tensor model_model_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023464064)))]; fp16 var_3353_to_fp16 = const()[name = string("op_3353_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3365_cast_fp16 = layer_norm(axes = var_3365_axes_0, epsilon = var_3353_to_fp16, gamma = model_model_layers_3_post_attention_layernorm_weight_to_fp16, x = input_65_cast_fp16)[name = string("op_3365_cast_fp16")]; tensor var_3379 = const()[name = string("op_3379"), val = tensor([0, 2, 1])]; tensor input_67_axes_0 = const()[name = string("input_67_axes_0"), val = tensor([2])]; tensor var_3380 = transpose(perm = var_3379, x = var_3365_cast_fp16)[name = string("transpose_145")]; tensor input_67 = expand_dims(axes = input_67_axes_0, x = var_3380)[name = string("input_67")]; string input_69_pad_type_0 = const()[name = string("input_69_pad_type_0"), val = string("valid")]; tensor input_69_strides_0 = const()[name = string("input_69_strides_0"), val = tensor([1, 1])]; tensor input_69_pad_0 = const()[name = string("input_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_69_dilations_0 = const()[name = string("input_69_dilations_0"), val = tensor([1, 1])]; int32 input_69_groups_0 = const()[name = string("input_69_groups_0"), val = int32(1)]; tensor input_69 = conv(dilations = input_69_dilations_0, groups = input_69_groups_0, pad = input_69_pad_0, pad_type = input_69_pad_type_0, strides = input_69_strides_0, weight = model_model_layers_3_mlp_gate_proj_weight_palettized, x = input_67)[name = string("input_69")]; string b_7_pad_type_0 = const()[name = string("b_7_pad_type_0"), val = string("valid")]; tensor b_7_strides_0 = const()[name = string("b_7_strides_0"), val = tensor([1, 1])]; tensor b_7_pad_0 = const()[name = string("b_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_7_dilations_0 = const()[name = string("b_7_dilations_0"), val = tensor([1, 1])]; int32 b_7_groups_0 = const()[name = string("b_7_groups_0"), val = int32(1)]; tensor b_7 = conv(dilations = b_7_dilations_0, groups = b_7_groups_0, pad = b_7_pad_0, pad_type = b_7_pad_type_0, strides = b_7_strides_0, weight = model_model_layers_3_mlp_up_proj_weight_palettized, x = input_67)[name = string("b_7")]; tensor c_7 = silu(x = input_69)[name = string("c_7")]; tensor input_71 = mul(x = c_7, y = b_7)[name = string("input_71")]; string e_7_pad_type_0 = const()[name = string("e_7_pad_type_0"), val = string("valid")]; tensor e_7_strides_0 = const()[name = string("e_7_strides_0"), val = tensor([1, 1])]; tensor e_7_pad_0 = const()[name = string("e_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_7_dilations_0 = const()[name = string("e_7_dilations_0"), val = tensor([1, 1])]; int32 e_7_groups_0 = const()[name = string("e_7_groups_0"), val = int32(1)]; tensor e_7 = conv(dilations = e_7_dilations_0, groups = e_7_groups_0, pad = e_7_pad_0, pad_type = e_7_pad_type_0, strides = e_7_strides_0, weight = model_model_layers_3_mlp_down_proj_weight_palettized, x = input_71)[name = string("e_7")]; tensor var_3402_axes_0 = const()[name = string("op_3402_axes_0"), val = tensor([2])]; tensor var_3402 = squeeze(axes = var_3402_axes_0, x = e_7)[name = string("op_3402")]; tensor var_3403 = const()[name = string("op_3403"), val = tensor([0, 2, 1])]; tensor var_3404 = transpose(perm = var_3403, x = var_3402)[name = string("transpose_144")]; tensor hidden_states_25_cast_fp16 = add(x = hidden_states_23_cast_fp16, y = var_3404)[name = string("hidden_states_25_cast_fp16")]; tensor mean_33_axes_0 = const()[name = string("mean_33_axes_0"), val = tensor([-1])]; bool mean_33_keep_dims_0 = const()[name = string("mean_33_keep_dims_0"), val = bool(true)]; tensor mean_33_cast_fp16 = reduce_mean(axes = mean_33_axes_0, keep_dims = mean_33_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_33_cast_fp16")]; tensor input_73_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_33_cast_fp16)[name = string("input_73_cast_fp16")]; tensor var_3422_axes_0 = const()[name = string("op_3422_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023468224)))]; fp16 var_3410_to_fp16 = const()[name = string("op_3410_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3422_cast_fp16 = layer_norm(axes = var_3422_axes_0, epsilon = var_3410_to_fp16, gamma = model_model_layers_4_input_layernorm_weight_to_fp16, x = input_73_cast_fp16)[name = string("op_3422_cast_fp16")]; tensor var_3428 = const()[name = string("op_3428"), val = tensor([0, 2, 1])]; tensor var_3431_axes_0 = const()[name = string("op_3431_axes_0"), val = tensor([2])]; tensor var_3429 = transpose(perm = var_3428, x = var_3422_cast_fp16)[name = string("transpose_143")]; tensor var_3431 = expand_dims(axes = var_3431_axes_0, x = var_3429)[name = string("op_3431")]; string var_3447_pad_type_0 = const()[name = string("op_3447_pad_type_0"), val = string("valid")]; tensor var_3447_strides_0 = const()[name = string("op_3447_strides_0"), val = tensor([1, 1])]; tensor var_3447_pad_0 = const()[name = string("op_3447_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3447_dilations_0 = const()[name = string("op_3447_dilations_0"), val = tensor([1, 1])]; int32 var_3447_groups_0 = const()[name = string("op_3447_groups_0"), val = int32(1)]; tensor var_3447 = conv(dilations = var_3447_dilations_0, groups = var_3447_groups_0, pad = var_3447_pad_0, pad_type = var_3447_pad_type_0, strides = var_3447_strides_0, weight = model_model_layers_4_self_attn_q_proj_weight_palettized, x = var_3431)[name = string("op_3447")]; tensor var_3452 = const()[name = string("op_3452"), val = tensor([1, 16, 1, 128])]; tensor var_3453 = reshape(shape = var_3452, x = var_3447)[name = string("op_3453")]; string var_3469_pad_type_0 = const()[name = string("op_3469_pad_type_0"), val = string("valid")]; tensor var_3469_strides_0 = const()[name = string("op_3469_strides_0"), val = tensor([1, 1])]; tensor var_3469_pad_0 = const()[name = string("op_3469_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3469_dilations_0 = const()[name = string("op_3469_dilations_0"), val = tensor([1, 1])]; int32 var_3469_groups_0 = const()[name = string("op_3469_groups_0"), val = int32(1)]; tensor var_3469 = conv(dilations = var_3469_dilations_0, groups = var_3469_groups_0, pad = var_3469_pad_0, pad_type = var_3469_pad_type_0, strides = var_3469_strides_0, weight = model_model_layers_4_self_attn_k_proj_weight_palettized, x = var_3431)[name = string("op_3469")]; tensor var_3474 = const()[name = string("op_3474"), val = tensor([1, 8, 1, 128])]; tensor var_3475 = reshape(shape = var_3474, x = var_3469)[name = string("op_3475")]; string var_3491_pad_type_0 = const()[name = string("op_3491_pad_type_0"), val = string("valid")]; tensor var_3491_strides_0 = const()[name = string("op_3491_strides_0"), val = tensor([1, 1])]; tensor var_3491_pad_0 = const()[name = string("op_3491_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3491_dilations_0 = const()[name = string("op_3491_dilations_0"), val = tensor([1, 1])]; int32 var_3491_groups_0 = const()[name = string("op_3491_groups_0"), val = int32(1)]; tensor var_3491 = conv(dilations = var_3491_dilations_0, groups = var_3491_groups_0, pad = var_3491_pad_0, pad_type = var_3491_pad_type_0, strides = var_3491_strides_0, weight = model_model_layers_4_self_attn_v_proj_weight_palettized, x = var_3431)[name = string("op_3491")]; tensor var_3496 = const()[name = string("op_3496"), val = tensor([1, 8, 1, 128])]; tensor var_3497 = reshape(shape = var_3496, x = var_3491)[name = string("op_3497")]; tensor mean_35_axes_0 = const()[name = string("mean_35_axes_0"), val = tensor([-1])]; bool mean_35_keep_dims_0 = const()[name = string("mean_35_keep_dims_0"), val = bool(true)]; tensor mean_35 = reduce_mean(axes = mean_35_axes_0, keep_dims = mean_35_keep_dims_0, x = var_3453)[name = string("mean_35")]; tensor input_77 = sub(x = var_3453, y = mean_35)[name = string("input_77")]; tensor var_3518_axes_0 = const()[name = string("op_3518_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_4_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023472384)))]; fp16 var_3506_to_fp16 = const()[name = string("op_3506_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3518_cast_fp16 = layer_norm(axes = var_3518_axes_0, epsilon = var_3506_to_fp16, gamma = model_model_layers_4_self_attn_q_norm_weight_to_fp16, x = input_77)[name = string("op_3518_cast_fp16")]; tensor mean_37_axes_0 = const()[name = string("mean_37_axes_0"), val = tensor([-1])]; bool mean_37_keep_dims_0 = const()[name = string("mean_37_keep_dims_0"), val = bool(true)]; tensor mean_37 = reduce_mean(axes = mean_37_axes_0, keep_dims = mean_37_keep_dims_0, x = var_3475)[name = string("mean_37")]; tensor input_79 = sub(x = var_3475, y = mean_37)[name = string("input_79")]; tensor var_3536_axes_0 = const()[name = string("op_3536_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_4_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023472704)))]; fp16 var_3524_to_fp16 = const()[name = string("op_3524_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3536_cast_fp16 = layer_norm(axes = var_3536_axes_0, epsilon = var_3524_to_fp16, gamma = model_model_layers_4_self_attn_k_norm_weight_to_fp16, x = input_79)[name = string("op_3536_cast_fp16")]; tensor var_3539 = mul(x = var_3518_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3539")]; tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = var_3518_cast_fp16)[name = string("x1_17")]; tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = var_3518_cast_fp16)[name = string("x2_17")]; fp16 const_77_promoted = const()[name = string("const_77_promoted"), val = fp16(-0x1p+0)]; tensor var_3560 = mul(x = x2_17, y = const_77_promoted)[name = string("op_3560")]; int32 var_3562 = const()[name = string("op_3562"), val = int32(-1)]; bool var_3563_interleave_0 = const()[name = string("op_3563_interleave_0"), val = bool(false)]; tensor var_3563 = concat(axis = var_3562, interleave = var_3563_interleave_0, values = (var_3560, x1_17))[name = string("op_3563")]; tensor var_3564 = mul(x = var_3563, y = sin_1_cast_fp16)[name = string("op_3564")]; tensor query_states_17 = add(x = var_3539, y = var_3564)[name = string("query_states_17")]; tensor var_3567 = mul(x = var_3536_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3567")]; tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = var_3536_cast_fp16)[name = string("x1_19")]; tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = var_3536_cast_fp16)[name = string("x2_19")]; fp16 const_80_promoted = const()[name = string("const_80_promoted"), val = fp16(-0x1p+0)]; tensor var_3588 = mul(x = x2_19, y = const_80_promoted)[name = string("op_3588")]; int32 var_3590 = const()[name = string("op_3590"), val = int32(-1)]; bool var_3591_interleave_0 = const()[name = string("op_3591_interleave_0"), val = bool(false)]; tensor var_3591 = concat(axis = var_3590, interleave = var_3591_interleave_0, values = (var_3588, x1_19))[name = string("op_3591")]; tensor var_3592 = mul(x = var_3591, y = sin_1_cast_fp16)[name = string("op_3592")]; tensor key_states_17 = add(x = var_3567, y = var_3592)[name = string("key_states_17")]; tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([4])]; tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([5])]; int32 concat_34_axis_0 = const()[name = string("concat_34_axis_0"), val = int32(0)]; bool concat_34_interleave_0 = const()[name = string("concat_34_interleave_0"), val = bool(false)]; tensor concat_34 = concat(axis = concat_34_axis_0, interleave = concat_34_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_34")]; tensor concat_35_values1_0 = const()[name = string("concat_35_values1_0"), val = tensor([0])]; tensor concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor([0])]; int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_52, concat_35_values1_0, var_1725, concat_35_values3_0))[name = string("concat_35")]; tensor model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_34, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_35, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = key_states_17, x = coreml_update_state_63)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_8_write_state")]; tensor coreml_update_state_64 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_8")]; tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([32])]; tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([33])]; int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_38")]; tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_58, concat_39_values1_0, var_1725, concat_39_values3_0))[name = string("concat_39")]; tensor model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = var_3497, x = coreml_update_state_64)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_9_write_state")]; tensor coreml_update_state_65 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_9")]; tensor var_3647_begin_0 = const()[name = string("op_3647_begin_0"), val = tensor([4, 0, 0, 0])]; tensor var_3647_end_0 = const()[name = string("op_3647_end_0"), val = tensor([5, 8, 1024, 128])]; tensor var_3647_end_mask_0 = const()[name = string("op_3647_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3647_cast_fp16 = slice_by_index(begin = var_3647_begin_0, end = var_3647_end_0, end_mask = var_3647_end_mask_0, x = coreml_update_state_65)[name = string("op_3647_cast_fp16")]; tensor K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor([0])]; tensor K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_3647_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")]; tensor var_3654_begin_0 = const()[name = string("op_3654_begin_0"), val = tensor([32, 0, 0, 0])]; tensor var_3654_end_0 = const()[name = string("op_3654_end_0"), val = tensor([33, 8, 1024, 128])]; tensor var_3654_end_mask_0 = const()[name = string("op_3654_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3654_cast_fp16 = slice_by_index(begin = var_3654_begin_0, end = var_3654_end_0, end_mask = var_3654_end_mask_0, x = coreml_update_state_65)[name = string("op_3654_cast_fp16")]; tensor V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor([0])]; tensor V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_3654_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")]; tensor x_87_axes_0 = const()[name = string("x_87_axes_0"), val = tensor([1])]; tensor x_87_cast_fp16 = expand_dims(axes = x_87_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_87_cast_fp16")]; tensor var_3691 = const()[name = string("op_3691"), val = tensor([1, 2, 1, 1])]; tensor x_89_cast_fp16 = tile(reps = var_3691, x = x_87_cast_fp16)[name = string("x_89_cast_fp16")]; tensor var_3703 = const()[name = string("op_3703"), val = tensor([1, -1, 1024, 128])]; tensor key_states_19_cast_fp16 = reshape(shape = var_3703, x = x_89_cast_fp16)[name = string("key_states_19_cast_fp16")]; tensor x_93_axes_0 = const()[name = string("x_93_axes_0"), val = tensor([1])]; tensor x_93_cast_fp16 = expand_dims(axes = x_93_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_93_cast_fp16")]; tensor var_3711 = const()[name = string("op_3711"), val = tensor([1, 2, 1, 1])]; tensor x_95_cast_fp16 = tile(reps = var_3711, x = x_93_cast_fp16)[name = string("x_95_cast_fp16")]; tensor var_3723 = const()[name = string("op_3723"), val = tensor([1, -1, 1024, 128])]; tensor value_states_27_cast_fp16 = reshape(shape = var_3723, x = x_95_cast_fp16)[name = string("value_states_27_cast_fp16")]; bool var_3738_transpose_x_1 = const()[name = string("op_3738_transpose_x_1"), val = bool(false)]; bool var_3738_transpose_y_1 = const()[name = string("op_3738_transpose_y_1"), val = bool(true)]; tensor var_3738 = matmul(transpose_x = var_3738_transpose_x_1, transpose_y = var_3738_transpose_y_1, x = query_states_17, y = key_states_19_cast_fp16)[name = string("op_3738")]; fp16 var_3739_to_fp16 = const()[name = string("op_3739_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_25_cast_fp16 = mul(x = var_3738, y = var_3739_to_fp16)[name = string("attn_weights_25_cast_fp16")]; tensor attn_weights_27_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask)[name = string("attn_weights_27_cast_fp16")]; int32 var_3774 = const()[name = string("op_3774"), val = int32(-1)]; tensor attn_weights_29_cast_fp16 = softmax(axis = var_3774, x = attn_weights_27_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; bool attn_output_41_transpose_x_0 = const()[name = string("attn_output_41_transpose_x_0"), val = bool(false)]; bool attn_output_41_transpose_y_0 = const()[name = string("attn_output_41_transpose_y_0"), val = bool(false)]; tensor attn_output_41_cast_fp16 = matmul(transpose_x = attn_output_41_transpose_x_0, transpose_y = attn_output_41_transpose_y_0, x = attn_weights_29_cast_fp16, y = value_states_27_cast_fp16)[name = string("attn_output_41_cast_fp16")]; tensor var_3785_perm_0 = const()[name = string("op_3785_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3789 = const()[name = string("op_3789"), val = tensor([1, 1, 2048])]; tensor var_3785_cast_fp16 = transpose(perm = var_3785_perm_0, x = attn_output_41_cast_fp16)[name = string("transpose_142")]; tensor attn_output_45_cast_fp16 = reshape(shape = var_3789, x = var_3785_cast_fp16)[name = string("attn_output_45_cast_fp16")]; tensor var_3794 = const()[name = string("op_3794"), val = tensor([0, 2, 1])]; string var_3810_pad_type_0 = const()[name = string("op_3810_pad_type_0"), val = string("valid")]; int32 var_3810_groups_0 = const()[name = string("op_3810_groups_0"), val = int32(1)]; tensor var_3810_strides_0 = const()[name = string("op_3810_strides_0"), val = tensor([1])]; tensor var_3810_pad_0 = const()[name = string("op_3810_pad_0"), val = tensor([0, 0])]; tensor var_3810_dilations_0 = const()[name = string("op_3810_dilations_0"), val = tensor([1])]; tensor squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023473024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1026618816))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_3795_cast_fp16 = transpose(perm = var_3794, x = attn_output_45_cast_fp16)[name = string("transpose_141")]; tensor var_3810_cast_fp16 = conv(dilations = var_3810_dilations_0, groups = var_3810_groups_0, pad = var_3810_pad_0, pad_type = var_3810_pad_type_0, strides = var_3810_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_3795_cast_fp16)[name = string("op_3810_cast_fp16")]; tensor var_3814 = const()[name = string("op_3814"), val = tensor([0, 2, 1])]; tensor attn_output_49_cast_fp16 = transpose(perm = var_3814, x = var_3810_cast_fp16)[name = string("transpose_140")]; tensor hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = attn_output_49_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; tensor mean_39_axes_0 = const()[name = string("mean_39_axes_0"), val = tensor([-1])]; bool mean_39_keep_dims_0 = const()[name = string("mean_39_keep_dims_0"), val = bool(true)]; tensor mean_39_cast_fp16 = reduce_mean(axes = mean_39_axes_0, keep_dims = mean_39_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_39_cast_fp16")]; tensor input_83_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_39_cast_fp16)[name = string("input_83_cast_fp16")]; tensor var_3833_axes_0 = const()[name = string("op_3833_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1026651648)))]; fp16 var_3821_to_fp16 = const()[name = string("op_3821_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3833_cast_fp16 = layer_norm(axes = var_3833_axes_0, epsilon = var_3821_to_fp16, gamma = model_model_layers_4_post_attention_layernorm_weight_to_fp16, x = input_83_cast_fp16)[name = string("op_3833_cast_fp16")]; tensor var_3847 = const()[name = string("op_3847"), val = tensor([0, 2, 1])]; tensor input_85_axes_0 = const()[name = string("input_85_axes_0"), val = tensor([2])]; tensor var_3848 = transpose(perm = var_3847, x = var_3833_cast_fp16)[name = string("transpose_139")]; tensor input_85 = expand_dims(axes = input_85_axes_0, x = var_3848)[name = string("input_85")]; string input_87_pad_type_0 = const()[name = string("input_87_pad_type_0"), val = string("valid")]; tensor input_87_strides_0 = const()[name = string("input_87_strides_0"), val = tensor([1, 1])]; tensor input_87_pad_0 = const()[name = string("input_87_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_87_dilations_0 = const()[name = string("input_87_dilations_0"), val = tensor([1, 1])]; int32 input_87_groups_0 = const()[name = string("input_87_groups_0"), val = int32(1)]; tensor input_87 = conv(dilations = input_87_dilations_0, groups = input_87_groups_0, pad = input_87_pad_0, pad_type = input_87_pad_type_0, strides = input_87_strides_0, weight = model_model_layers_4_mlp_gate_proj_weight_palettized, x = input_85)[name = string("input_87")]; string b_9_pad_type_0 = const()[name = string("b_9_pad_type_0"), val = string("valid")]; tensor b_9_strides_0 = const()[name = string("b_9_strides_0"), val = tensor([1, 1])]; tensor b_9_pad_0 = const()[name = string("b_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_9_dilations_0 = const()[name = string("b_9_dilations_0"), val = tensor([1, 1])]; int32 b_9_groups_0 = const()[name = string("b_9_groups_0"), val = int32(1)]; tensor b_9 = conv(dilations = b_9_dilations_0, groups = b_9_groups_0, pad = b_9_pad_0, pad_type = b_9_pad_type_0, strides = b_9_strides_0, weight = model_model_layers_4_mlp_up_proj_weight_palettized, x = input_85)[name = string("b_9")]; tensor c_9 = silu(x = input_87)[name = string("c_9")]; tensor input_89 = mul(x = c_9, y = b_9)[name = string("input_89")]; string e_9_pad_type_0 = const()[name = string("e_9_pad_type_0"), val = string("valid")]; tensor e_9_strides_0 = const()[name = string("e_9_strides_0"), val = tensor([1, 1])]; tensor e_9_pad_0 = const()[name = string("e_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_9_dilations_0 = const()[name = string("e_9_dilations_0"), val = tensor([1, 1])]; int32 e_9_groups_0 = const()[name = string("e_9_groups_0"), val = int32(1)]; tensor e_9 = conv(dilations = e_9_dilations_0, groups = e_9_groups_0, pad = e_9_pad_0, pad_type = e_9_pad_type_0, strides = e_9_strides_0, weight = model_model_layers_4_mlp_down_proj_weight_palettized, x = input_89)[name = string("e_9")]; tensor var_3870_axes_0 = const()[name = string("op_3870_axes_0"), val = tensor([2])]; tensor var_3870 = squeeze(axes = var_3870_axes_0, x = e_9)[name = string("op_3870")]; tensor var_3871 = const()[name = string("op_3871"), val = tensor([0, 2, 1])]; tensor var_3872 = transpose(perm = var_3871, x = var_3870)[name = string("transpose_138")]; tensor hidden_states_31_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_3872)[name = string("hidden_states_31_cast_fp16")]; tensor mean_41_axes_0 = const()[name = string("mean_41_axes_0"), val = tensor([-1])]; bool mean_41_keep_dims_0 = const()[name = string("mean_41_keep_dims_0"), val = bool(true)]; tensor mean_41_cast_fp16 = reduce_mean(axes = mean_41_axes_0, keep_dims = mean_41_keep_dims_0, x = hidden_states_31_cast_fp16)[name = string("mean_41_cast_fp16")]; tensor input_91_cast_fp16 = sub(x = hidden_states_31_cast_fp16, y = mean_41_cast_fp16)[name = string("input_91_cast_fp16")]; tensor var_3890_axes_0 = const()[name = string("op_3890_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1026655808)))]; fp16 var_3878_to_fp16 = const()[name = string("op_3878_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3890_cast_fp16 = layer_norm(axes = var_3890_axes_0, epsilon = var_3878_to_fp16, gamma = model_model_layers_5_input_layernorm_weight_to_fp16, x = input_91_cast_fp16)[name = string("op_3890_cast_fp16")]; tensor var_3896 = const()[name = string("op_3896"), val = tensor([0, 2, 1])]; tensor var_3899_axes_0 = const()[name = string("op_3899_axes_0"), val = tensor([2])]; tensor var_3897 = transpose(perm = var_3896, x = var_3890_cast_fp16)[name = string("transpose_137")]; tensor var_3899 = expand_dims(axes = var_3899_axes_0, x = var_3897)[name = string("op_3899")]; string var_3915_pad_type_0 = const()[name = string("op_3915_pad_type_0"), val = string("valid")]; tensor var_3915_strides_0 = const()[name = string("op_3915_strides_0"), val = tensor([1, 1])]; tensor var_3915_pad_0 = const()[name = string("op_3915_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3915_dilations_0 = const()[name = string("op_3915_dilations_0"), val = tensor([1, 1])]; int32 var_3915_groups_0 = const()[name = string("op_3915_groups_0"), val = int32(1)]; tensor var_3915 = conv(dilations = var_3915_dilations_0, groups = var_3915_groups_0, pad = var_3915_pad_0, pad_type = var_3915_pad_type_0, strides = var_3915_strides_0, weight = model_model_layers_5_self_attn_q_proj_weight_palettized, x = var_3899)[name = string("op_3915")]; tensor var_3920 = const()[name = string("op_3920"), val = tensor([1, 16, 1, 128])]; tensor var_3921 = reshape(shape = var_3920, x = var_3915)[name = string("op_3921")]; string var_3937_pad_type_0 = const()[name = string("op_3937_pad_type_0"), val = string("valid")]; tensor var_3937_strides_0 = const()[name = string("op_3937_strides_0"), val = tensor([1, 1])]; tensor var_3937_pad_0 = const()[name = string("op_3937_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3937_dilations_0 = const()[name = string("op_3937_dilations_0"), val = tensor([1, 1])]; int32 var_3937_groups_0 = const()[name = string("op_3937_groups_0"), val = int32(1)]; tensor var_3937 = conv(dilations = var_3937_dilations_0, groups = var_3937_groups_0, pad = var_3937_pad_0, pad_type = var_3937_pad_type_0, strides = var_3937_strides_0, weight = model_model_layers_5_self_attn_k_proj_weight_palettized, x = var_3899)[name = string("op_3937")]; tensor var_3942 = const()[name = string("op_3942"), val = tensor([1, 8, 1, 128])]; tensor var_3943 = reshape(shape = var_3942, x = var_3937)[name = string("op_3943")]; string var_3959_pad_type_0 = const()[name = string("op_3959_pad_type_0"), val = string("valid")]; tensor var_3959_strides_0 = const()[name = string("op_3959_strides_0"), val = tensor([1, 1])]; tensor var_3959_pad_0 = const()[name = string("op_3959_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3959_dilations_0 = const()[name = string("op_3959_dilations_0"), val = tensor([1, 1])]; int32 var_3959_groups_0 = const()[name = string("op_3959_groups_0"), val = int32(1)]; tensor var_3959 = conv(dilations = var_3959_dilations_0, groups = var_3959_groups_0, pad = var_3959_pad_0, pad_type = var_3959_pad_type_0, strides = var_3959_strides_0, weight = model_model_layers_5_self_attn_v_proj_weight_palettized, x = var_3899)[name = string("op_3959")]; tensor var_3964 = const()[name = string("op_3964"), val = tensor([1, 8, 1, 128])]; tensor var_3965 = reshape(shape = var_3964, x = var_3959)[name = string("op_3965")]; tensor mean_43_axes_0 = const()[name = string("mean_43_axes_0"), val = tensor([-1])]; bool mean_43_keep_dims_0 = const()[name = string("mean_43_keep_dims_0"), val = bool(true)]; tensor mean_43 = reduce_mean(axes = mean_43_axes_0, keep_dims = mean_43_keep_dims_0, x = var_3921)[name = string("mean_43")]; tensor input_95 = sub(x = var_3921, y = mean_43)[name = string("input_95")]; tensor var_3986_axes_0 = const()[name = string("op_3986_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_5_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1026659968)))]; fp16 var_3974_to_fp16 = const()[name = string("op_3974_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3986_cast_fp16 = layer_norm(axes = var_3986_axes_0, epsilon = var_3974_to_fp16, gamma = model_model_layers_5_self_attn_q_norm_weight_to_fp16, x = input_95)[name = string("op_3986_cast_fp16")]; tensor mean_45_axes_0 = const()[name = string("mean_45_axes_0"), val = tensor([-1])]; bool mean_45_keep_dims_0 = const()[name = string("mean_45_keep_dims_0"), val = bool(true)]; tensor mean_45 = reduce_mean(axes = mean_45_axes_0, keep_dims = mean_45_keep_dims_0, x = var_3943)[name = string("mean_45")]; tensor input_97 = sub(x = var_3943, y = mean_45)[name = string("input_97")]; tensor var_4004_axes_0 = const()[name = string("op_4004_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_5_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1026660288)))]; fp16 var_3992_to_fp16 = const()[name = string("op_3992_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4004_cast_fp16 = layer_norm(axes = var_4004_axes_0, epsilon = var_3992_to_fp16, gamma = model_model_layers_5_self_attn_k_norm_weight_to_fp16, x = input_97)[name = string("op_4004_cast_fp16")]; tensor var_4007 = mul(x = var_3986_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4007")]; tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = var_3986_cast_fp16)[name = string("x1_21")]; tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = var_3986_cast_fp16)[name = string("x2_21")]; fp16 const_95_promoted = const()[name = string("const_95_promoted"), val = fp16(-0x1p+0)]; tensor var_4028 = mul(x = x2_21, y = const_95_promoted)[name = string("op_4028")]; int32 var_4030 = const()[name = string("op_4030"), val = int32(-1)]; bool var_4031_interleave_0 = const()[name = string("op_4031_interleave_0"), val = bool(false)]; tensor var_4031 = concat(axis = var_4030, interleave = var_4031_interleave_0, values = (var_4028, x1_21))[name = string("op_4031")]; tensor var_4032 = mul(x = var_4031, y = sin_1_cast_fp16)[name = string("op_4032")]; tensor query_states_21 = add(x = var_4007, y = var_4032)[name = string("query_states_21")]; tensor var_4035 = mul(x = var_4004_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4035")]; tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = var_4004_cast_fp16)[name = string("x1_23")]; tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = var_4004_cast_fp16)[name = string("x2_23")]; fp16 const_98_promoted = const()[name = string("const_98_promoted"), val = fp16(-0x1p+0)]; tensor var_4056 = mul(x = x2_23, y = const_98_promoted)[name = string("op_4056")]; int32 var_4058 = const()[name = string("op_4058"), val = int32(-1)]; bool var_4059_interleave_0 = const()[name = string("op_4059_interleave_0"), val = bool(false)]; tensor var_4059 = concat(axis = var_4058, interleave = var_4059_interleave_0, values = (var_4056, x1_23))[name = string("op_4059")]; tensor var_4060 = mul(x = var_4059, y = sin_1_cast_fp16)[name = string("op_4060")]; tensor key_states_21 = add(x = var_4035, y = var_4060)[name = string("key_states_21")]; tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([5])]; tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([6])]; int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_42")]; tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_64, concat_43_values1_0, var_1725, concat_43_values3_0))[name = string("concat_43")]; tensor model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = key_states_21, x = coreml_update_state_65)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_10_write_state")]; tensor coreml_update_state_66 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_10")]; tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([33])]; tensor expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor([0])]; tensor expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor([0])]; tensor expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor([34])]; int32 concat_46_axis_0 = const()[name = string("concat_46_axis_0"), val = int32(0)]; bool concat_46_interleave_0 = const()[name = string("concat_46_interleave_0"), val = bool(false)]; tensor concat_46 = concat(axis = concat_46_axis_0, interleave = concat_46_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_46")]; tensor concat_47_values1_0 = const()[name = string("concat_47_values1_0"), val = tensor([0])]; tensor concat_47_values3_0 = const()[name = string("concat_47_values3_0"), val = tensor([0])]; int32 concat_47_axis_0 = const()[name = string("concat_47_axis_0"), val = int32(0)]; bool concat_47_interleave_0 = const()[name = string("concat_47_interleave_0"), val = bool(false)]; tensor concat_47 = concat(axis = concat_47_axis_0, interleave = concat_47_interleave_0, values = (expand_dims_70, concat_47_values1_0, var_1725, concat_47_values3_0))[name = string("concat_47")]; tensor model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_46, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_47, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = var_3965, x = coreml_update_state_66)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_11_write_state")]; tensor coreml_update_state_67 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_11")]; tensor var_4115_begin_0 = const()[name = string("op_4115_begin_0"), val = tensor([5, 0, 0, 0])]; tensor var_4115_end_0 = const()[name = string("op_4115_end_0"), val = tensor([6, 8, 1024, 128])]; tensor var_4115_end_mask_0 = const()[name = string("op_4115_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4115_cast_fp16 = slice_by_index(begin = var_4115_begin_0, end = var_4115_end_0, end_mask = var_4115_end_mask_0, x = coreml_update_state_67)[name = string("op_4115_cast_fp16")]; tensor K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor([0])]; tensor K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_4115_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")]; tensor var_4122_begin_0 = const()[name = string("op_4122_begin_0"), val = tensor([33, 0, 0, 0])]; tensor var_4122_end_0 = const()[name = string("op_4122_end_0"), val = tensor([34, 8, 1024, 128])]; tensor var_4122_end_mask_0 = const()[name = string("op_4122_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4122_cast_fp16 = slice_by_index(begin = var_4122_begin_0, end = var_4122_end_0, end_mask = var_4122_end_mask_0, x = coreml_update_state_67)[name = string("op_4122_cast_fp16")]; tensor V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor([0])]; tensor V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_4122_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")]; tensor x_107_axes_0 = const()[name = string("x_107_axes_0"), val = tensor([1])]; tensor x_107_cast_fp16 = expand_dims(axes = x_107_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_107_cast_fp16")]; tensor var_4159 = const()[name = string("op_4159"), val = tensor([1, 2, 1, 1])]; tensor x_109_cast_fp16 = tile(reps = var_4159, x = x_107_cast_fp16)[name = string("x_109_cast_fp16")]; tensor var_4171 = const()[name = string("op_4171"), val = tensor([1, -1, 1024, 128])]; tensor key_states_23_cast_fp16 = reshape(shape = var_4171, x = x_109_cast_fp16)[name = string("key_states_23_cast_fp16")]; tensor x_113_axes_0 = const()[name = string("x_113_axes_0"), val = tensor([1])]; tensor x_113_cast_fp16 = expand_dims(axes = x_113_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_113_cast_fp16")]; tensor var_4179 = const()[name = string("op_4179"), val = tensor([1, 2, 1, 1])]; tensor x_115_cast_fp16 = tile(reps = var_4179, x = x_113_cast_fp16)[name = string("x_115_cast_fp16")]; tensor var_4191 = const()[name = string("op_4191"), val = tensor([1, -1, 1024, 128])]; tensor value_states_33_cast_fp16 = reshape(shape = var_4191, x = x_115_cast_fp16)[name = string("value_states_33_cast_fp16")]; bool var_4206_transpose_x_1 = const()[name = string("op_4206_transpose_x_1"), val = bool(false)]; bool var_4206_transpose_y_1 = const()[name = string("op_4206_transpose_y_1"), val = bool(true)]; tensor var_4206 = matmul(transpose_x = var_4206_transpose_x_1, transpose_y = var_4206_transpose_y_1, x = query_states_21, y = key_states_23_cast_fp16)[name = string("op_4206")]; fp16 var_4207_to_fp16 = const()[name = string("op_4207_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_31_cast_fp16 = mul(x = var_4206, y = var_4207_to_fp16)[name = string("attn_weights_31_cast_fp16")]; tensor attn_weights_33_cast_fp16 = add(x = attn_weights_31_cast_fp16, y = causal_mask)[name = string("attn_weights_33_cast_fp16")]; int32 var_4242 = const()[name = string("op_4242"), val = int32(-1)]; tensor attn_weights_35_cast_fp16 = softmax(axis = var_4242, x = attn_weights_33_cast_fp16)[name = string("attn_weights_35_cast_fp16")]; bool attn_output_51_transpose_x_0 = const()[name = string("attn_output_51_transpose_x_0"), val = bool(false)]; bool attn_output_51_transpose_y_0 = const()[name = string("attn_output_51_transpose_y_0"), val = bool(false)]; tensor attn_output_51_cast_fp16 = matmul(transpose_x = attn_output_51_transpose_x_0, transpose_y = attn_output_51_transpose_y_0, x = attn_weights_35_cast_fp16, y = value_states_33_cast_fp16)[name = string("attn_output_51_cast_fp16")]; tensor var_4253_perm_0 = const()[name = string("op_4253_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_4257 = const()[name = string("op_4257"), val = tensor([1, 1, 2048])]; tensor var_4253_cast_fp16 = transpose(perm = var_4253_perm_0, x = attn_output_51_cast_fp16)[name = string("transpose_136")]; tensor attn_output_55_cast_fp16 = reshape(shape = var_4257, x = var_4253_cast_fp16)[name = string("attn_output_55_cast_fp16")]; tensor var_4262 = const()[name = string("op_4262"), val = tensor([0, 2, 1])]; string var_4278_pad_type_0 = const()[name = string("op_4278_pad_type_0"), val = string("valid")]; int32 var_4278_groups_0 = const()[name = string("op_4278_groups_0"), val = int32(1)]; tensor var_4278_strides_0 = const()[name = string("op_4278_strides_0"), val = tensor([1])]; tensor var_4278_pad_0 = const()[name = string("op_4278_pad_0"), val = tensor([0, 0])]; tensor var_4278_dilations_0 = const()[name = string("op_4278_dilations_0"), val = tensor([1])]; tensor squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1026660608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1029806400))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_4263_cast_fp16 = transpose(perm = var_4262, x = attn_output_55_cast_fp16)[name = string("transpose_135")]; tensor var_4278_cast_fp16 = conv(dilations = var_4278_dilations_0, groups = var_4278_groups_0, pad = var_4278_pad_0, pad_type = var_4278_pad_type_0, strides = var_4278_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_4263_cast_fp16)[name = string("op_4278_cast_fp16")]; tensor var_4282 = const()[name = string("op_4282"), val = tensor([0, 2, 1])]; tensor attn_output_59_cast_fp16 = transpose(perm = var_4282, x = var_4278_cast_fp16)[name = string("transpose_134")]; tensor hidden_states_35_cast_fp16 = add(x = hidden_states_31_cast_fp16, y = attn_output_59_cast_fp16)[name = string("hidden_states_35_cast_fp16")]; tensor mean_47_axes_0 = const()[name = string("mean_47_axes_0"), val = tensor([-1])]; bool mean_47_keep_dims_0 = const()[name = string("mean_47_keep_dims_0"), val = bool(true)]; tensor mean_47_cast_fp16 = reduce_mean(axes = mean_47_axes_0, keep_dims = mean_47_keep_dims_0, x = hidden_states_35_cast_fp16)[name = string("mean_47_cast_fp16")]; tensor input_101_cast_fp16 = sub(x = hidden_states_35_cast_fp16, y = mean_47_cast_fp16)[name = string("input_101_cast_fp16")]; tensor var_4301_axes_0 = const()[name = string("op_4301_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1029839232)))]; fp16 var_4289_to_fp16 = const()[name = string("op_4289_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4301_cast_fp16 = layer_norm(axes = var_4301_axes_0, epsilon = var_4289_to_fp16, gamma = model_model_layers_5_post_attention_layernorm_weight_to_fp16, x = input_101_cast_fp16)[name = string("op_4301_cast_fp16")]; tensor var_4315 = const()[name = string("op_4315"), val = tensor([0, 2, 1])]; tensor input_103_axes_0 = const()[name = string("input_103_axes_0"), val = tensor([2])]; tensor var_4316 = transpose(perm = var_4315, x = var_4301_cast_fp16)[name = string("transpose_133")]; tensor input_103 = expand_dims(axes = input_103_axes_0, x = var_4316)[name = string("input_103")]; string input_105_pad_type_0 = const()[name = string("input_105_pad_type_0"), val = string("valid")]; tensor input_105_strides_0 = const()[name = string("input_105_strides_0"), val = tensor([1, 1])]; tensor input_105_pad_0 = const()[name = string("input_105_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_105_dilations_0 = const()[name = string("input_105_dilations_0"), val = tensor([1, 1])]; int32 input_105_groups_0 = const()[name = string("input_105_groups_0"), val = int32(1)]; tensor input_105 = conv(dilations = input_105_dilations_0, groups = input_105_groups_0, pad = input_105_pad_0, pad_type = input_105_pad_type_0, strides = input_105_strides_0, weight = model_model_layers_5_mlp_gate_proj_weight_palettized, x = input_103)[name = string("input_105")]; string b_11_pad_type_0 = const()[name = string("b_11_pad_type_0"), val = string("valid")]; tensor b_11_strides_0 = const()[name = string("b_11_strides_0"), val = tensor([1, 1])]; tensor b_11_pad_0 = const()[name = string("b_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_11_dilations_0 = const()[name = string("b_11_dilations_0"), val = tensor([1, 1])]; int32 b_11_groups_0 = const()[name = string("b_11_groups_0"), val = int32(1)]; tensor b_11 = conv(dilations = b_11_dilations_0, groups = b_11_groups_0, pad = b_11_pad_0, pad_type = b_11_pad_type_0, strides = b_11_strides_0, weight = model_model_layers_5_mlp_up_proj_weight_palettized, x = input_103)[name = string("b_11")]; tensor c_11 = silu(x = input_105)[name = string("c_11")]; tensor input_107 = mul(x = c_11, y = b_11)[name = string("input_107")]; string e_11_pad_type_0 = const()[name = string("e_11_pad_type_0"), val = string("valid")]; tensor e_11_strides_0 = const()[name = string("e_11_strides_0"), val = tensor([1, 1])]; tensor e_11_pad_0 = const()[name = string("e_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_11_dilations_0 = const()[name = string("e_11_dilations_0"), val = tensor([1, 1])]; int32 e_11_groups_0 = const()[name = string("e_11_groups_0"), val = int32(1)]; tensor e_11 = conv(dilations = e_11_dilations_0, groups = e_11_groups_0, pad = e_11_pad_0, pad_type = e_11_pad_type_0, strides = e_11_strides_0, weight = model_model_layers_5_mlp_down_proj_weight_palettized, x = input_107)[name = string("e_11")]; tensor var_4338_axes_0 = const()[name = string("op_4338_axes_0"), val = tensor([2])]; tensor var_4338 = squeeze(axes = var_4338_axes_0, x = e_11)[name = string("op_4338")]; tensor var_4339 = const()[name = string("op_4339"), val = tensor([0, 2, 1])]; tensor var_4340 = transpose(perm = var_4339, x = var_4338)[name = string("transpose_132")]; tensor hidden_states_37_cast_fp16 = add(x = hidden_states_35_cast_fp16, y = var_4340)[name = string("hidden_states_37_cast_fp16")]; tensor mean_49_axes_0 = const()[name = string("mean_49_axes_0"), val = tensor([-1])]; bool mean_49_keep_dims_0 = const()[name = string("mean_49_keep_dims_0"), val = bool(true)]; tensor mean_49_cast_fp16 = reduce_mean(axes = mean_49_axes_0, keep_dims = mean_49_keep_dims_0, x = hidden_states_37_cast_fp16)[name = string("mean_49_cast_fp16")]; tensor input_109_cast_fp16 = sub(x = hidden_states_37_cast_fp16, y = mean_49_cast_fp16)[name = string("input_109_cast_fp16")]; tensor var_4358_axes_0 = const()[name = string("op_4358_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1029843392)))]; fp16 var_4346_to_fp16 = const()[name = string("op_4346_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4358_cast_fp16 = layer_norm(axes = var_4358_axes_0, epsilon = var_4346_to_fp16, gamma = model_model_layers_6_input_layernorm_weight_to_fp16, x = input_109_cast_fp16)[name = string("op_4358_cast_fp16")]; tensor var_4364 = const()[name = string("op_4364"), val = tensor([0, 2, 1])]; tensor var_4367_axes_0 = const()[name = string("op_4367_axes_0"), val = tensor([2])]; tensor var_4365 = transpose(perm = var_4364, x = var_4358_cast_fp16)[name = string("transpose_131")]; tensor var_4367 = expand_dims(axes = var_4367_axes_0, x = var_4365)[name = string("op_4367")]; string var_4383_pad_type_0 = const()[name = string("op_4383_pad_type_0"), val = string("valid")]; tensor var_4383_strides_0 = const()[name = string("op_4383_strides_0"), val = tensor([1, 1])]; tensor var_4383_pad_0 = const()[name = string("op_4383_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4383_dilations_0 = const()[name = string("op_4383_dilations_0"), val = tensor([1, 1])]; int32 var_4383_groups_0 = const()[name = string("op_4383_groups_0"), val = int32(1)]; tensor var_4383 = conv(dilations = var_4383_dilations_0, groups = var_4383_groups_0, pad = var_4383_pad_0, pad_type = var_4383_pad_type_0, strides = var_4383_strides_0, weight = model_model_layers_6_self_attn_q_proj_weight_palettized, x = var_4367)[name = string("op_4383")]; tensor var_4388 = const()[name = string("op_4388"), val = tensor([1, 16, 1, 128])]; tensor var_4389 = reshape(shape = var_4388, x = var_4383)[name = string("op_4389")]; string var_4405_pad_type_0 = const()[name = string("op_4405_pad_type_0"), val = string("valid")]; tensor var_4405_strides_0 = const()[name = string("op_4405_strides_0"), val = tensor([1, 1])]; tensor var_4405_pad_0 = const()[name = string("op_4405_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4405_dilations_0 = const()[name = string("op_4405_dilations_0"), val = tensor([1, 1])]; int32 var_4405_groups_0 = const()[name = string("op_4405_groups_0"), val = int32(1)]; tensor var_4405 = conv(dilations = var_4405_dilations_0, groups = var_4405_groups_0, pad = var_4405_pad_0, pad_type = var_4405_pad_type_0, strides = var_4405_strides_0, weight = model_model_layers_6_self_attn_k_proj_weight_palettized, x = var_4367)[name = string("op_4405")]; tensor var_4410 = const()[name = string("op_4410"), val = tensor([1, 8, 1, 128])]; tensor var_4411 = reshape(shape = var_4410, x = var_4405)[name = string("op_4411")]; string var_4427_pad_type_0 = const()[name = string("op_4427_pad_type_0"), val = string("valid")]; tensor var_4427_strides_0 = const()[name = string("op_4427_strides_0"), val = tensor([1, 1])]; tensor var_4427_pad_0 = const()[name = string("op_4427_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4427_dilations_0 = const()[name = string("op_4427_dilations_0"), val = tensor([1, 1])]; int32 var_4427_groups_0 = const()[name = string("op_4427_groups_0"), val = int32(1)]; tensor var_4427 = conv(dilations = var_4427_dilations_0, groups = var_4427_groups_0, pad = var_4427_pad_0, pad_type = var_4427_pad_type_0, strides = var_4427_strides_0, weight = model_model_layers_6_self_attn_v_proj_weight_palettized, x = var_4367)[name = string("op_4427")]; tensor var_4432 = const()[name = string("op_4432"), val = tensor([1, 8, 1, 128])]; tensor var_4433 = reshape(shape = var_4432, x = var_4427)[name = string("op_4433")]; tensor mean_51_axes_0 = const()[name = string("mean_51_axes_0"), val = tensor([-1])]; bool mean_51_keep_dims_0 = const()[name = string("mean_51_keep_dims_0"), val = bool(true)]; tensor mean_51 = reduce_mean(axes = mean_51_axes_0, keep_dims = mean_51_keep_dims_0, x = var_4389)[name = string("mean_51")]; tensor input_113 = sub(x = var_4389, y = mean_51)[name = string("input_113")]; tensor var_4454_axes_0 = const()[name = string("op_4454_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_6_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1029847552)))]; fp16 var_4442_to_fp16 = const()[name = string("op_4442_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4454_cast_fp16 = layer_norm(axes = var_4454_axes_0, epsilon = var_4442_to_fp16, gamma = model_model_layers_6_self_attn_q_norm_weight_to_fp16, x = input_113)[name = string("op_4454_cast_fp16")]; tensor mean_53_axes_0 = const()[name = string("mean_53_axes_0"), val = tensor([-1])]; bool mean_53_keep_dims_0 = const()[name = string("mean_53_keep_dims_0"), val = bool(true)]; tensor mean_53 = reduce_mean(axes = mean_53_axes_0, keep_dims = mean_53_keep_dims_0, x = var_4411)[name = string("mean_53")]; tensor input_115 = sub(x = var_4411, y = mean_53)[name = string("input_115")]; tensor var_4472_axes_0 = const()[name = string("op_4472_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_6_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1029847872)))]; fp16 var_4460_to_fp16 = const()[name = string("op_4460_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4472_cast_fp16 = layer_norm(axes = var_4472_axes_0, epsilon = var_4460_to_fp16, gamma = model_model_layers_6_self_attn_k_norm_weight_to_fp16, x = input_115)[name = string("op_4472_cast_fp16")]; tensor var_4475 = mul(x = var_4454_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4475")]; tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = var_4454_cast_fp16)[name = string("x1_25")]; tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = var_4454_cast_fp16)[name = string("x2_25")]; fp16 const_113_promoted = const()[name = string("const_113_promoted"), val = fp16(-0x1p+0)]; tensor var_4496 = mul(x = x2_25, y = const_113_promoted)[name = string("op_4496")]; int32 var_4498 = const()[name = string("op_4498"), val = int32(-1)]; bool var_4499_interleave_0 = const()[name = string("op_4499_interleave_0"), val = bool(false)]; tensor var_4499 = concat(axis = var_4498, interleave = var_4499_interleave_0, values = (var_4496, x1_25))[name = string("op_4499")]; tensor var_4500 = mul(x = var_4499, y = sin_1_cast_fp16)[name = string("op_4500")]; tensor query_states_25 = add(x = var_4475, y = var_4500)[name = string("query_states_25")]; tensor var_4503 = mul(x = var_4472_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4503")]; tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_27 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = var_4472_cast_fp16)[name = string("x1_27")]; tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_27 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = var_4472_cast_fp16)[name = string("x2_27")]; fp16 const_116_promoted = const()[name = string("const_116_promoted"), val = fp16(-0x1p+0)]; tensor var_4524 = mul(x = x2_27, y = const_116_promoted)[name = string("op_4524")]; int32 var_4526 = const()[name = string("op_4526"), val = int32(-1)]; bool var_4527_interleave_0 = const()[name = string("op_4527_interleave_0"), val = bool(false)]; tensor var_4527 = concat(axis = var_4526, interleave = var_4527_interleave_0, values = (var_4524, x1_27))[name = string("op_4527")]; tensor var_4528 = mul(x = var_4527, y = sin_1_cast_fp16)[name = string("op_4528")]; tensor key_states_25 = add(x = var_4503, y = var_4528)[name = string("key_states_25")]; tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([6])]; tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([7])]; int32 concat_50_axis_0 = const()[name = string("concat_50_axis_0"), val = int32(0)]; bool concat_50_interleave_0 = const()[name = string("concat_50_interleave_0"), val = bool(false)]; tensor concat_50 = concat(axis = concat_50_axis_0, interleave = concat_50_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_50")]; tensor concat_51_values1_0 = const()[name = string("concat_51_values1_0"), val = tensor([0])]; tensor concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor([0])]; int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (expand_dims_76, concat_51_values1_0, var_1725, concat_51_values3_0))[name = string("concat_51")]; tensor model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_50, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_51, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = key_states_25, x = coreml_update_state_67)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_12_write_state")]; tensor coreml_update_state_68 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_12")]; tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([34])]; tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([35])]; int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_54")]; tensor concat_55_values1_0 = const()[name = string("concat_55_values1_0"), val = tensor([0])]; tensor concat_55_values3_0 = const()[name = string("concat_55_values3_0"), val = tensor([0])]; int32 concat_55_axis_0 = const()[name = string("concat_55_axis_0"), val = int32(0)]; bool concat_55_interleave_0 = const()[name = string("concat_55_interleave_0"), val = bool(false)]; tensor concat_55 = concat(axis = concat_55_axis_0, interleave = concat_55_interleave_0, values = (expand_dims_82, concat_55_values1_0, var_1725, concat_55_values3_0))[name = string("concat_55")]; tensor model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_54, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_55, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = var_4433, x = coreml_update_state_68)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_13_write_state")]; tensor coreml_update_state_69 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_13")]; tensor var_4583_begin_0 = const()[name = string("op_4583_begin_0"), val = tensor([6, 0, 0, 0])]; tensor var_4583_end_0 = const()[name = string("op_4583_end_0"), val = tensor([7, 8, 1024, 128])]; tensor var_4583_end_mask_0 = const()[name = string("op_4583_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4583_cast_fp16 = slice_by_index(begin = var_4583_begin_0, end = var_4583_end_0, end_mask = var_4583_end_mask_0, x = coreml_update_state_69)[name = string("op_4583_cast_fp16")]; tensor K_layer_cache_13_axes_0 = const()[name = string("K_layer_cache_13_axes_0"), val = tensor([0])]; tensor K_layer_cache_13_cast_fp16 = squeeze(axes = K_layer_cache_13_axes_0, x = var_4583_cast_fp16)[name = string("K_layer_cache_13_cast_fp16")]; tensor var_4590_begin_0 = const()[name = string("op_4590_begin_0"), val = tensor([34, 0, 0, 0])]; tensor var_4590_end_0 = const()[name = string("op_4590_end_0"), val = tensor([35, 8, 1024, 128])]; tensor var_4590_end_mask_0 = const()[name = string("op_4590_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4590_cast_fp16 = slice_by_index(begin = var_4590_begin_0, end = var_4590_end_0, end_mask = var_4590_end_mask_0, x = coreml_update_state_69)[name = string("op_4590_cast_fp16")]; tensor V_layer_cache_13_axes_0 = const()[name = string("V_layer_cache_13_axes_0"), val = tensor([0])]; tensor V_layer_cache_13_cast_fp16 = squeeze(axes = V_layer_cache_13_axes_0, x = var_4590_cast_fp16)[name = string("V_layer_cache_13_cast_fp16")]; tensor x_127_axes_0 = const()[name = string("x_127_axes_0"), val = tensor([1])]; tensor x_127_cast_fp16 = expand_dims(axes = x_127_axes_0, x = K_layer_cache_13_cast_fp16)[name = string("x_127_cast_fp16")]; tensor var_4627 = const()[name = string("op_4627"), val = tensor([1, 2, 1, 1])]; tensor x_129_cast_fp16 = tile(reps = var_4627, x = x_127_cast_fp16)[name = string("x_129_cast_fp16")]; tensor var_4639 = const()[name = string("op_4639"), val = tensor([1, -1, 1024, 128])]; tensor key_states_27_cast_fp16 = reshape(shape = var_4639, x = x_129_cast_fp16)[name = string("key_states_27_cast_fp16")]; tensor x_133_axes_0 = const()[name = string("x_133_axes_0"), val = tensor([1])]; tensor x_133_cast_fp16 = expand_dims(axes = x_133_axes_0, x = V_layer_cache_13_cast_fp16)[name = string("x_133_cast_fp16")]; tensor var_4647 = const()[name = string("op_4647"), val = tensor([1, 2, 1, 1])]; tensor x_135_cast_fp16 = tile(reps = var_4647, x = x_133_cast_fp16)[name = string("x_135_cast_fp16")]; tensor var_4659 = const()[name = string("op_4659"), val = tensor([1, -1, 1024, 128])]; tensor value_states_39_cast_fp16 = reshape(shape = var_4659, x = x_135_cast_fp16)[name = string("value_states_39_cast_fp16")]; bool var_4674_transpose_x_1 = const()[name = string("op_4674_transpose_x_1"), val = bool(false)]; bool var_4674_transpose_y_1 = const()[name = string("op_4674_transpose_y_1"), val = bool(true)]; tensor var_4674 = matmul(transpose_x = var_4674_transpose_x_1, transpose_y = var_4674_transpose_y_1, x = query_states_25, y = key_states_27_cast_fp16)[name = string("op_4674")]; fp16 var_4675_to_fp16 = const()[name = string("op_4675_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_37_cast_fp16 = mul(x = var_4674, y = var_4675_to_fp16)[name = string("attn_weights_37_cast_fp16")]; tensor attn_weights_39_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = causal_mask)[name = string("attn_weights_39_cast_fp16")]; int32 var_4710 = const()[name = string("op_4710"), val = int32(-1)]; tensor attn_weights_41_cast_fp16 = softmax(axis = var_4710, x = attn_weights_39_cast_fp16)[name = string("attn_weights_41_cast_fp16")]; bool attn_output_61_transpose_x_0 = const()[name = string("attn_output_61_transpose_x_0"), val = bool(false)]; bool attn_output_61_transpose_y_0 = const()[name = string("attn_output_61_transpose_y_0"), val = bool(false)]; tensor attn_output_61_cast_fp16 = matmul(transpose_x = attn_output_61_transpose_x_0, transpose_y = attn_output_61_transpose_y_0, x = attn_weights_41_cast_fp16, y = value_states_39_cast_fp16)[name = string("attn_output_61_cast_fp16")]; tensor var_4721_perm_0 = const()[name = string("op_4721_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_4725 = const()[name = string("op_4725"), val = tensor([1, 1, 2048])]; tensor var_4721_cast_fp16 = transpose(perm = var_4721_perm_0, x = attn_output_61_cast_fp16)[name = string("transpose_130")]; tensor attn_output_65_cast_fp16 = reshape(shape = var_4725, x = var_4721_cast_fp16)[name = string("attn_output_65_cast_fp16")]; tensor var_4730 = const()[name = string("op_4730"), val = tensor([0, 2, 1])]; string var_4746_pad_type_0 = const()[name = string("op_4746_pad_type_0"), val = string("valid")]; int32 var_4746_groups_0 = const()[name = string("op_4746_groups_0"), val = int32(1)]; tensor var_4746_strides_0 = const()[name = string("op_4746_strides_0"), val = tensor([1])]; tensor var_4746_pad_0 = const()[name = string("op_4746_pad_0"), val = tensor([0, 0])]; tensor var_4746_dilations_0 = const()[name = string("op_4746_dilations_0"), val = tensor([1])]; tensor squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1029848192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1032993984))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_4731_cast_fp16 = transpose(perm = var_4730, x = attn_output_65_cast_fp16)[name = string("transpose_129")]; tensor var_4746_cast_fp16 = conv(dilations = var_4746_dilations_0, groups = var_4746_groups_0, pad = var_4746_pad_0, pad_type = var_4746_pad_type_0, strides = var_4746_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_4731_cast_fp16)[name = string("op_4746_cast_fp16")]; tensor var_4750 = const()[name = string("op_4750"), val = tensor([0, 2, 1])]; tensor attn_output_69_cast_fp16 = transpose(perm = var_4750, x = var_4746_cast_fp16)[name = string("transpose_128")]; tensor hidden_states_41_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = attn_output_69_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; tensor mean_55_axes_0 = const()[name = string("mean_55_axes_0"), val = tensor([-1])]; bool mean_55_keep_dims_0 = const()[name = string("mean_55_keep_dims_0"), val = bool(true)]; tensor mean_55_cast_fp16 = reduce_mean(axes = mean_55_axes_0, keep_dims = mean_55_keep_dims_0, x = hidden_states_41_cast_fp16)[name = string("mean_55_cast_fp16")]; tensor input_119_cast_fp16 = sub(x = hidden_states_41_cast_fp16, y = mean_55_cast_fp16)[name = string("input_119_cast_fp16")]; tensor var_4769_axes_0 = const()[name = string("op_4769_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1033026816)))]; fp16 var_4757_to_fp16 = const()[name = string("op_4757_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4769_cast_fp16 = layer_norm(axes = var_4769_axes_0, epsilon = var_4757_to_fp16, gamma = model_model_layers_6_post_attention_layernorm_weight_to_fp16, x = input_119_cast_fp16)[name = string("op_4769_cast_fp16")]; tensor var_4783 = const()[name = string("op_4783"), val = tensor([0, 2, 1])]; tensor input_121_axes_0 = const()[name = string("input_121_axes_0"), val = tensor([2])]; tensor var_4784 = transpose(perm = var_4783, x = var_4769_cast_fp16)[name = string("transpose_127")]; tensor input_121 = expand_dims(axes = input_121_axes_0, x = var_4784)[name = string("input_121")]; string input_123_pad_type_0 = const()[name = string("input_123_pad_type_0"), val = string("valid")]; tensor input_123_strides_0 = const()[name = string("input_123_strides_0"), val = tensor([1, 1])]; tensor input_123_pad_0 = const()[name = string("input_123_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_123_dilations_0 = const()[name = string("input_123_dilations_0"), val = tensor([1, 1])]; int32 input_123_groups_0 = const()[name = string("input_123_groups_0"), val = int32(1)]; tensor input_123 = conv(dilations = input_123_dilations_0, groups = input_123_groups_0, pad = input_123_pad_0, pad_type = input_123_pad_type_0, strides = input_123_strides_0, weight = model_model_layers_6_mlp_gate_proj_weight_palettized, x = input_121)[name = string("input_123")]; string b_13_pad_type_0 = const()[name = string("b_13_pad_type_0"), val = string("valid")]; tensor b_13_strides_0 = const()[name = string("b_13_strides_0"), val = tensor([1, 1])]; tensor b_13_pad_0 = const()[name = string("b_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_13_dilations_0 = const()[name = string("b_13_dilations_0"), val = tensor([1, 1])]; int32 b_13_groups_0 = const()[name = string("b_13_groups_0"), val = int32(1)]; tensor b_13 = conv(dilations = b_13_dilations_0, groups = b_13_groups_0, pad = b_13_pad_0, pad_type = b_13_pad_type_0, strides = b_13_strides_0, weight = model_model_layers_6_mlp_up_proj_weight_palettized, x = input_121)[name = string("b_13")]; tensor c_13 = silu(x = input_123)[name = string("c_13")]; tensor input_125 = mul(x = c_13, y = b_13)[name = string("input_125")]; string e_13_pad_type_0 = const()[name = string("e_13_pad_type_0"), val = string("valid")]; tensor e_13_strides_0 = const()[name = string("e_13_strides_0"), val = tensor([1, 1])]; tensor e_13_pad_0 = const()[name = string("e_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_13_dilations_0 = const()[name = string("e_13_dilations_0"), val = tensor([1, 1])]; int32 e_13_groups_0 = const()[name = string("e_13_groups_0"), val = int32(1)]; tensor e_13 = conv(dilations = e_13_dilations_0, groups = e_13_groups_0, pad = e_13_pad_0, pad_type = e_13_pad_type_0, strides = e_13_strides_0, weight = model_model_layers_6_mlp_down_proj_weight_palettized, x = input_125)[name = string("e_13")]; tensor var_4806_axes_0 = const()[name = string("op_4806_axes_0"), val = tensor([2])]; tensor var_4806 = squeeze(axes = var_4806_axes_0, x = e_13)[name = string("op_4806")]; tensor var_4807 = const()[name = string("op_4807"), val = tensor([0, 2, 1])]; tensor var_4808 = transpose(perm = var_4807, x = var_4806)[name = string("transpose_126")]; tensor hidden_states_43_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = var_4808)[name = string("hidden_states_43_cast_fp16")]; tensor mean_57_axes_0 = const()[name = string("mean_57_axes_0"), val = tensor([-1])]; bool mean_57_keep_dims_0 = const()[name = string("mean_57_keep_dims_0"), val = bool(true)]; tensor mean_57_cast_fp16 = reduce_mean(axes = mean_57_axes_0, keep_dims = mean_57_keep_dims_0, x = hidden_states_43_cast_fp16)[name = string("mean_57_cast_fp16")]; tensor input_127_cast_fp16 = sub(x = hidden_states_43_cast_fp16, y = mean_57_cast_fp16)[name = string("input_127_cast_fp16")]; tensor var_4826_axes_0 = const()[name = string("op_4826_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1033030976)))]; fp16 var_4814_to_fp16 = const()[name = string("op_4814_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4826_cast_fp16 = layer_norm(axes = var_4826_axes_0, epsilon = var_4814_to_fp16, gamma = model_model_layers_7_input_layernorm_weight_to_fp16, x = input_127_cast_fp16)[name = string("op_4826_cast_fp16")]; tensor var_4832 = const()[name = string("op_4832"), val = tensor([0, 2, 1])]; tensor var_4835_axes_0 = const()[name = string("op_4835_axes_0"), val = tensor([2])]; tensor var_4833 = transpose(perm = var_4832, x = var_4826_cast_fp16)[name = string("transpose_125")]; tensor var_4835 = expand_dims(axes = var_4835_axes_0, x = var_4833)[name = string("op_4835")]; string var_4851_pad_type_0 = const()[name = string("op_4851_pad_type_0"), val = string("valid")]; tensor var_4851_strides_0 = const()[name = string("op_4851_strides_0"), val = tensor([1, 1])]; tensor var_4851_pad_0 = const()[name = string("op_4851_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4851_dilations_0 = const()[name = string("op_4851_dilations_0"), val = tensor([1, 1])]; int32 var_4851_groups_0 = const()[name = string("op_4851_groups_0"), val = int32(1)]; tensor var_4851 = conv(dilations = var_4851_dilations_0, groups = var_4851_groups_0, pad = var_4851_pad_0, pad_type = var_4851_pad_type_0, strides = var_4851_strides_0, weight = model_model_layers_7_self_attn_q_proj_weight_palettized, x = var_4835)[name = string("op_4851")]; tensor var_4856 = const()[name = string("op_4856"), val = tensor([1, 16, 1, 128])]; tensor var_4857 = reshape(shape = var_4856, x = var_4851)[name = string("op_4857")]; string var_4873_pad_type_0 = const()[name = string("op_4873_pad_type_0"), val = string("valid")]; tensor var_4873_strides_0 = const()[name = string("op_4873_strides_0"), val = tensor([1, 1])]; tensor var_4873_pad_0 = const()[name = string("op_4873_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4873_dilations_0 = const()[name = string("op_4873_dilations_0"), val = tensor([1, 1])]; int32 var_4873_groups_0 = const()[name = string("op_4873_groups_0"), val = int32(1)]; tensor var_4873 = conv(dilations = var_4873_dilations_0, groups = var_4873_groups_0, pad = var_4873_pad_0, pad_type = var_4873_pad_type_0, strides = var_4873_strides_0, weight = model_model_layers_7_self_attn_k_proj_weight_palettized, x = var_4835)[name = string("op_4873")]; tensor var_4878 = const()[name = string("op_4878"), val = tensor([1, 8, 1, 128])]; tensor var_4879 = reshape(shape = var_4878, x = var_4873)[name = string("op_4879")]; string var_4895_pad_type_0 = const()[name = string("op_4895_pad_type_0"), val = string("valid")]; tensor var_4895_strides_0 = const()[name = string("op_4895_strides_0"), val = tensor([1, 1])]; tensor var_4895_pad_0 = const()[name = string("op_4895_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4895_dilations_0 = const()[name = string("op_4895_dilations_0"), val = tensor([1, 1])]; int32 var_4895_groups_0 = const()[name = string("op_4895_groups_0"), val = int32(1)]; tensor var_4895 = conv(dilations = var_4895_dilations_0, groups = var_4895_groups_0, pad = var_4895_pad_0, pad_type = var_4895_pad_type_0, strides = var_4895_strides_0, weight = model_model_layers_7_self_attn_v_proj_weight_palettized, x = var_4835)[name = string("op_4895")]; tensor var_4900 = const()[name = string("op_4900"), val = tensor([1, 8, 1, 128])]; tensor var_4901 = reshape(shape = var_4900, x = var_4895)[name = string("op_4901")]; tensor mean_59_axes_0 = const()[name = string("mean_59_axes_0"), val = tensor([-1])]; bool mean_59_keep_dims_0 = const()[name = string("mean_59_keep_dims_0"), val = bool(true)]; tensor mean_59 = reduce_mean(axes = mean_59_axes_0, keep_dims = mean_59_keep_dims_0, x = var_4857)[name = string("mean_59")]; tensor input_131 = sub(x = var_4857, y = mean_59)[name = string("input_131")]; tensor var_4922_axes_0 = const()[name = string("op_4922_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_7_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1033035136)))]; fp16 var_4910_to_fp16 = const()[name = string("op_4910_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4922_cast_fp16 = layer_norm(axes = var_4922_axes_0, epsilon = var_4910_to_fp16, gamma = model_model_layers_7_self_attn_q_norm_weight_to_fp16, x = input_131)[name = string("op_4922_cast_fp16")]; tensor mean_61_axes_0 = const()[name = string("mean_61_axes_0"), val = tensor([-1])]; bool mean_61_keep_dims_0 = const()[name = string("mean_61_keep_dims_0"), val = bool(true)]; tensor mean_61 = reduce_mean(axes = mean_61_axes_0, keep_dims = mean_61_keep_dims_0, x = var_4879)[name = string("mean_61")]; tensor input_133 = sub(x = var_4879, y = mean_61)[name = string("input_133")]; tensor var_4940_axes_0 = const()[name = string("op_4940_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_7_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1033035456)))]; fp16 var_4928_to_fp16 = const()[name = string("op_4928_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4940_cast_fp16 = layer_norm(axes = var_4940_axes_0, epsilon = var_4928_to_fp16, gamma = model_model_layers_7_self_attn_k_norm_weight_to_fp16, x = input_133)[name = string("op_4940_cast_fp16")]; tensor var_4943 = mul(x = var_4922_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4943")]; tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_29 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = var_4922_cast_fp16)[name = string("x1_29")]; tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_29 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = var_4922_cast_fp16)[name = string("x2_29")]; fp16 const_131_promoted = const()[name = string("const_131_promoted"), val = fp16(-0x1p+0)]; tensor var_4964 = mul(x = x2_29, y = const_131_promoted)[name = string("op_4964")]; int32 var_4966 = const()[name = string("op_4966"), val = int32(-1)]; bool var_4967_interleave_0 = const()[name = string("op_4967_interleave_0"), val = bool(false)]; tensor var_4967 = concat(axis = var_4966, interleave = var_4967_interleave_0, values = (var_4964, x1_29))[name = string("op_4967")]; tensor var_4968 = mul(x = var_4967, y = sin_1_cast_fp16)[name = string("op_4968")]; tensor query_states_29 = add(x = var_4943, y = var_4968)[name = string("query_states_29")]; tensor var_4971 = mul(x = var_4940_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4971")]; tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_31 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = var_4940_cast_fp16)[name = string("x1_31")]; tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_31 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = var_4940_cast_fp16)[name = string("x2_31")]; fp16 const_134_promoted = const()[name = string("const_134_promoted"), val = fp16(-0x1p+0)]; tensor var_4992 = mul(x = x2_31, y = const_134_promoted)[name = string("op_4992")]; int32 var_4994 = const()[name = string("op_4994"), val = int32(-1)]; bool var_4995_interleave_0 = const()[name = string("op_4995_interleave_0"), val = bool(false)]; tensor var_4995 = concat(axis = var_4994, interleave = var_4995_interleave_0, values = (var_4992, x1_31))[name = string("op_4995")]; tensor var_4996 = mul(x = var_4995, y = sin_1_cast_fp16)[name = string("op_4996")]; tensor key_states_29 = add(x = var_4971, y = var_4996)[name = string("key_states_29")]; tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([7])]; tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; tensor expand_dims_88 = const()[name = string("expand_dims_88"), val = tensor([8])]; int32 concat_58_axis_0 = const()[name = string("concat_58_axis_0"), val = int32(0)]; bool concat_58_interleave_0 = const()[name = string("concat_58_interleave_0"), val = bool(false)]; tensor concat_58 = concat(axis = concat_58_axis_0, interleave = concat_58_interleave_0, values = (expand_dims_84, expand_dims_85, current_pos, expand_dims_87))[name = string("concat_58")]; tensor concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = tensor([0])]; tensor concat_59_values3_0 = const()[name = string("concat_59_values3_0"), val = tensor([0])]; int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (expand_dims_88, concat_59_values1_0, var_1725, concat_59_values3_0))[name = string("concat_59")]; tensor model_model_kv_cache_0_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_58, begin_mask = model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0, end = concat_59, end_mask = model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_15_stride_0, update = key_states_29, x = coreml_update_state_69)[name = string("model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_14_write_state")]; tensor coreml_update_state_70 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_14")]; tensor expand_dims_90 = const()[name = string("expand_dims_90"), val = tensor([35])]; tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([0])]; tensor expand_dims_93 = const()[name = string("expand_dims_93"), val = tensor([0])]; tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([36])]; int32 concat_62_axis_0 = const()[name = string("concat_62_axis_0"), val = int32(0)]; bool concat_62_interleave_0 = const()[name = string("concat_62_interleave_0"), val = bool(false)]; tensor concat_62 = concat(axis = concat_62_axis_0, interleave = concat_62_interleave_0, values = (expand_dims_90, expand_dims_91, current_pos, expand_dims_93))[name = string("concat_62")]; tensor concat_63_values1_0 = const()[name = string("concat_63_values1_0"), val = tensor([0])]; tensor concat_63_values3_0 = const()[name = string("concat_63_values3_0"), val = tensor([0])]; int32 concat_63_axis_0 = const()[name = string("concat_63_axis_0"), val = int32(0)]; bool concat_63_interleave_0 = const()[name = string("concat_63_interleave_0"), val = bool(false)]; tensor concat_63 = concat(axis = concat_63_axis_0, interleave = concat_63_interleave_0, values = (expand_dims_94, concat_63_values1_0, var_1725, concat_63_values3_0))[name = string("concat_63")]; tensor model_model_kv_cache_0_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_62, begin_mask = model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0, end = concat_63, end_mask = model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_16_stride_0, update = var_4901, x = coreml_update_state_70)[name = string("model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_15_write_state")]; tensor coreml_update_state_71 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_15")]; tensor var_5051_begin_0 = const()[name = string("op_5051_begin_0"), val = tensor([7, 0, 0, 0])]; tensor var_5051_end_0 = const()[name = string("op_5051_end_0"), val = tensor([8, 8, 1024, 128])]; tensor var_5051_end_mask_0 = const()[name = string("op_5051_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5051_cast_fp16 = slice_by_index(begin = var_5051_begin_0, end = var_5051_end_0, end_mask = var_5051_end_mask_0, x = coreml_update_state_71)[name = string("op_5051_cast_fp16")]; tensor K_layer_cache_15_axes_0 = const()[name = string("K_layer_cache_15_axes_0"), val = tensor([0])]; tensor K_layer_cache_15_cast_fp16 = squeeze(axes = K_layer_cache_15_axes_0, x = var_5051_cast_fp16)[name = string("K_layer_cache_15_cast_fp16")]; tensor var_5058_begin_0 = const()[name = string("op_5058_begin_0"), val = tensor([35, 0, 0, 0])]; tensor var_5058_end_0 = const()[name = string("op_5058_end_0"), val = tensor([36, 8, 1024, 128])]; tensor var_5058_end_mask_0 = const()[name = string("op_5058_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5058_cast_fp16 = slice_by_index(begin = var_5058_begin_0, end = var_5058_end_0, end_mask = var_5058_end_mask_0, x = coreml_update_state_71)[name = string("op_5058_cast_fp16")]; tensor V_layer_cache_15_axes_0 = const()[name = string("V_layer_cache_15_axes_0"), val = tensor([0])]; tensor V_layer_cache_15_cast_fp16 = squeeze(axes = V_layer_cache_15_axes_0, x = var_5058_cast_fp16)[name = string("V_layer_cache_15_cast_fp16")]; tensor x_147_axes_0 = const()[name = string("x_147_axes_0"), val = tensor([1])]; tensor x_147_cast_fp16 = expand_dims(axes = x_147_axes_0, x = K_layer_cache_15_cast_fp16)[name = string("x_147_cast_fp16")]; tensor var_5095 = const()[name = string("op_5095"), val = tensor([1, 2, 1, 1])]; tensor x_149_cast_fp16 = tile(reps = var_5095, x = x_147_cast_fp16)[name = string("x_149_cast_fp16")]; tensor var_5107 = const()[name = string("op_5107"), val = tensor([1, -1, 1024, 128])]; tensor key_states_31_cast_fp16 = reshape(shape = var_5107, x = x_149_cast_fp16)[name = string("key_states_31_cast_fp16")]; tensor x_153_axes_0 = const()[name = string("x_153_axes_0"), val = tensor([1])]; tensor x_153_cast_fp16 = expand_dims(axes = x_153_axes_0, x = V_layer_cache_15_cast_fp16)[name = string("x_153_cast_fp16")]; tensor var_5115 = const()[name = string("op_5115"), val = tensor([1, 2, 1, 1])]; tensor x_155_cast_fp16 = tile(reps = var_5115, x = x_153_cast_fp16)[name = string("x_155_cast_fp16")]; tensor var_5127 = const()[name = string("op_5127"), val = tensor([1, -1, 1024, 128])]; tensor value_states_45_cast_fp16 = reshape(shape = var_5127, x = x_155_cast_fp16)[name = string("value_states_45_cast_fp16")]; bool var_5142_transpose_x_1 = const()[name = string("op_5142_transpose_x_1"), val = bool(false)]; bool var_5142_transpose_y_1 = const()[name = string("op_5142_transpose_y_1"), val = bool(true)]; tensor var_5142 = matmul(transpose_x = var_5142_transpose_x_1, transpose_y = var_5142_transpose_y_1, x = query_states_29, y = key_states_31_cast_fp16)[name = string("op_5142")]; fp16 var_5143_to_fp16 = const()[name = string("op_5143_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_43_cast_fp16 = mul(x = var_5142, y = var_5143_to_fp16)[name = string("attn_weights_43_cast_fp16")]; tensor attn_weights_45_cast_fp16 = add(x = attn_weights_43_cast_fp16, y = causal_mask)[name = string("attn_weights_45_cast_fp16")]; int32 var_5178 = const()[name = string("op_5178"), val = int32(-1)]; tensor attn_weights_47_cast_fp16 = softmax(axis = var_5178, x = attn_weights_45_cast_fp16)[name = string("attn_weights_47_cast_fp16")]; bool attn_output_71_transpose_x_0 = const()[name = string("attn_output_71_transpose_x_0"), val = bool(false)]; bool attn_output_71_transpose_y_0 = const()[name = string("attn_output_71_transpose_y_0"), val = bool(false)]; tensor attn_output_71_cast_fp16 = matmul(transpose_x = attn_output_71_transpose_x_0, transpose_y = attn_output_71_transpose_y_0, x = attn_weights_47_cast_fp16, y = value_states_45_cast_fp16)[name = string("attn_output_71_cast_fp16")]; tensor var_5189_perm_0 = const()[name = string("op_5189_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_5193 = const()[name = string("op_5193"), val = tensor([1, 1, 2048])]; tensor var_5189_cast_fp16 = transpose(perm = var_5189_perm_0, x = attn_output_71_cast_fp16)[name = string("transpose_124")]; tensor attn_output_75_cast_fp16 = reshape(shape = var_5193, x = var_5189_cast_fp16)[name = string("attn_output_75_cast_fp16")]; tensor var_5198 = const()[name = string("op_5198"), val = tensor([0, 2, 1])]; string var_5214_pad_type_0 = const()[name = string("op_5214_pad_type_0"), val = string("valid")]; int32 var_5214_groups_0 = const()[name = string("op_5214_groups_0"), val = int32(1)]; tensor var_5214_strides_0 = const()[name = string("op_5214_strides_0"), val = tensor([1])]; tensor var_5214_pad_0 = const()[name = string("op_5214_pad_0"), val = tensor([0, 0])]; tensor var_5214_dilations_0 = const()[name = string("op_5214_dilations_0"), val = tensor([1])]; tensor squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1033035776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1036181568))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_5199_cast_fp16 = transpose(perm = var_5198, x = attn_output_75_cast_fp16)[name = string("transpose_123")]; tensor var_5214_cast_fp16 = conv(dilations = var_5214_dilations_0, groups = var_5214_groups_0, pad = var_5214_pad_0, pad_type = var_5214_pad_type_0, strides = var_5214_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_5199_cast_fp16)[name = string("op_5214_cast_fp16")]; tensor var_5218 = const()[name = string("op_5218"), val = tensor([0, 2, 1])]; tensor attn_output_79_cast_fp16 = transpose(perm = var_5218, x = var_5214_cast_fp16)[name = string("transpose_122")]; tensor hidden_states_47_cast_fp16 = add(x = hidden_states_43_cast_fp16, y = attn_output_79_cast_fp16)[name = string("hidden_states_47_cast_fp16")]; tensor mean_63_axes_0 = const()[name = string("mean_63_axes_0"), val = tensor([-1])]; bool mean_63_keep_dims_0 = const()[name = string("mean_63_keep_dims_0"), val = bool(true)]; tensor mean_63_cast_fp16 = reduce_mean(axes = mean_63_axes_0, keep_dims = mean_63_keep_dims_0, x = hidden_states_47_cast_fp16)[name = string("mean_63_cast_fp16")]; tensor input_137_cast_fp16 = sub(x = hidden_states_47_cast_fp16, y = mean_63_cast_fp16)[name = string("input_137_cast_fp16")]; tensor var_5237_axes_0 = const()[name = string("op_5237_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1036214400)))]; fp16 var_5225_to_fp16 = const()[name = string("op_5225_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5237_cast_fp16 = layer_norm(axes = var_5237_axes_0, epsilon = var_5225_to_fp16, gamma = model_model_layers_7_post_attention_layernorm_weight_to_fp16, x = input_137_cast_fp16)[name = string("op_5237_cast_fp16")]; tensor var_5251 = const()[name = string("op_5251"), val = tensor([0, 2, 1])]; tensor input_139_axes_0 = const()[name = string("input_139_axes_0"), val = tensor([2])]; tensor var_5252 = transpose(perm = var_5251, x = var_5237_cast_fp16)[name = string("transpose_121")]; tensor input_139 = expand_dims(axes = input_139_axes_0, x = var_5252)[name = string("input_139")]; string input_141_pad_type_0 = const()[name = string("input_141_pad_type_0"), val = string("valid")]; tensor input_141_strides_0 = const()[name = string("input_141_strides_0"), val = tensor([1, 1])]; tensor input_141_pad_0 = const()[name = string("input_141_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_141_dilations_0 = const()[name = string("input_141_dilations_0"), val = tensor([1, 1])]; int32 input_141_groups_0 = const()[name = string("input_141_groups_0"), val = int32(1)]; tensor input_141 = conv(dilations = input_141_dilations_0, groups = input_141_groups_0, pad = input_141_pad_0, pad_type = input_141_pad_type_0, strides = input_141_strides_0, weight = model_model_layers_7_mlp_gate_proj_weight_palettized, x = input_139)[name = string("input_141")]; string b_15_pad_type_0 = const()[name = string("b_15_pad_type_0"), val = string("valid")]; tensor b_15_strides_0 = const()[name = string("b_15_strides_0"), val = tensor([1, 1])]; tensor b_15_pad_0 = const()[name = string("b_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_15_dilations_0 = const()[name = string("b_15_dilations_0"), val = tensor([1, 1])]; int32 b_15_groups_0 = const()[name = string("b_15_groups_0"), val = int32(1)]; tensor b_15 = conv(dilations = b_15_dilations_0, groups = b_15_groups_0, pad = b_15_pad_0, pad_type = b_15_pad_type_0, strides = b_15_strides_0, weight = model_model_layers_7_mlp_up_proj_weight_palettized, x = input_139)[name = string("b_15")]; tensor c_15 = silu(x = input_141)[name = string("c_15")]; tensor input_143 = mul(x = c_15, y = b_15)[name = string("input_143")]; string e_15_pad_type_0 = const()[name = string("e_15_pad_type_0"), val = string("valid")]; tensor e_15_strides_0 = const()[name = string("e_15_strides_0"), val = tensor([1, 1])]; tensor e_15_pad_0 = const()[name = string("e_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_15_dilations_0 = const()[name = string("e_15_dilations_0"), val = tensor([1, 1])]; int32 e_15_groups_0 = const()[name = string("e_15_groups_0"), val = int32(1)]; tensor e_15 = conv(dilations = e_15_dilations_0, groups = e_15_groups_0, pad = e_15_pad_0, pad_type = e_15_pad_type_0, strides = e_15_strides_0, weight = model_model_layers_7_mlp_down_proj_weight_palettized, x = input_143)[name = string("e_15")]; tensor var_5274_axes_0 = const()[name = string("op_5274_axes_0"), val = tensor([2])]; tensor var_5274 = squeeze(axes = var_5274_axes_0, x = e_15)[name = string("op_5274")]; tensor var_5275 = const()[name = string("op_5275"), val = tensor([0, 2, 1])]; tensor var_5276 = transpose(perm = var_5275, x = var_5274)[name = string("transpose_120")]; tensor hidden_states_49_cast_fp16 = add(x = hidden_states_47_cast_fp16, y = var_5276)[name = string("hidden_states_49_cast_fp16")]; tensor mean_65_axes_0 = const()[name = string("mean_65_axes_0"), val = tensor([-1])]; bool mean_65_keep_dims_0 = const()[name = string("mean_65_keep_dims_0"), val = bool(true)]; tensor mean_65_cast_fp16 = reduce_mean(axes = mean_65_axes_0, keep_dims = mean_65_keep_dims_0, x = hidden_states_49_cast_fp16)[name = string("mean_65_cast_fp16")]; tensor input_145_cast_fp16 = sub(x = hidden_states_49_cast_fp16, y = mean_65_cast_fp16)[name = string("input_145_cast_fp16")]; tensor var_5294_axes_0 = const()[name = string("op_5294_axes_0"), val = tensor([-1])]; tensor model_model_layers_8_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1036218560)))]; fp16 var_5282_to_fp16 = const()[name = string("op_5282_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5294_cast_fp16 = layer_norm(axes = var_5294_axes_0, epsilon = var_5282_to_fp16, gamma = model_model_layers_8_input_layernorm_weight_to_fp16, x = input_145_cast_fp16)[name = string("op_5294_cast_fp16")]; tensor var_5300 = const()[name = string("op_5300"), val = tensor([0, 2, 1])]; tensor var_5303_axes_0 = const()[name = string("op_5303_axes_0"), val = tensor([2])]; tensor var_5301 = transpose(perm = var_5300, x = var_5294_cast_fp16)[name = string("transpose_119")]; tensor var_5303 = expand_dims(axes = var_5303_axes_0, x = var_5301)[name = string("op_5303")]; string var_5319_pad_type_0 = const()[name = string("op_5319_pad_type_0"), val = string("valid")]; tensor var_5319_strides_0 = const()[name = string("op_5319_strides_0"), val = tensor([1, 1])]; tensor var_5319_pad_0 = const()[name = string("op_5319_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5319_dilations_0 = const()[name = string("op_5319_dilations_0"), val = tensor([1, 1])]; int32 var_5319_groups_0 = const()[name = string("op_5319_groups_0"), val = int32(1)]; tensor var_5319 = conv(dilations = var_5319_dilations_0, groups = var_5319_groups_0, pad = var_5319_pad_0, pad_type = var_5319_pad_type_0, strides = var_5319_strides_0, weight = model_model_layers_8_self_attn_q_proj_weight_palettized, x = var_5303)[name = string("op_5319")]; tensor var_5324 = const()[name = string("op_5324"), val = tensor([1, 16, 1, 128])]; tensor var_5325 = reshape(shape = var_5324, x = var_5319)[name = string("op_5325")]; string var_5341_pad_type_0 = const()[name = string("op_5341_pad_type_0"), val = string("valid")]; tensor var_5341_strides_0 = const()[name = string("op_5341_strides_0"), val = tensor([1, 1])]; tensor var_5341_pad_0 = const()[name = string("op_5341_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5341_dilations_0 = const()[name = string("op_5341_dilations_0"), val = tensor([1, 1])]; int32 var_5341_groups_0 = const()[name = string("op_5341_groups_0"), val = int32(1)]; tensor var_5341 = conv(dilations = var_5341_dilations_0, groups = var_5341_groups_0, pad = var_5341_pad_0, pad_type = var_5341_pad_type_0, strides = var_5341_strides_0, weight = model_model_layers_8_self_attn_k_proj_weight_palettized, x = var_5303)[name = string("op_5341")]; tensor var_5346 = const()[name = string("op_5346"), val = tensor([1, 8, 1, 128])]; tensor var_5347 = reshape(shape = var_5346, x = var_5341)[name = string("op_5347")]; string var_5363_pad_type_0 = const()[name = string("op_5363_pad_type_0"), val = string("valid")]; tensor var_5363_strides_0 = const()[name = string("op_5363_strides_0"), val = tensor([1, 1])]; tensor var_5363_pad_0 = const()[name = string("op_5363_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5363_dilations_0 = const()[name = string("op_5363_dilations_0"), val = tensor([1, 1])]; int32 var_5363_groups_0 = const()[name = string("op_5363_groups_0"), val = int32(1)]; tensor var_5363 = conv(dilations = var_5363_dilations_0, groups = var_5363_groups_0, pad = var_5363_pad_0, pad_type = var_5363_pad_type_0, strides = var_5363_strides_0, weight = model_model_layers_8_self_attn_v_proj_weight_palettized, x = var_5303)[name = string("op_5363")]; tensor var_5368 = const()[name = string("op_5368"), val = tensor([1, 8, 1, 128])]; tensor var_5369 = reshape(shape = var_5368, x = var_5363)[name = string("op_5369")]; tensor mean_67_axes_0 = const()[name = string("mean_67_axes_0"), val = tensor([-1])]; bool mean_67_keep_dims_0 = const()[name = string("mean_67_keep_dims_0"), val = bool(true)]; tensor mean_67 = reduce_mean(axes = mean_67_axes_0, keep_dims = mean_67_keep_dims_0, x = var_5325)[name = string("mean_67")]; tensor input_149 = sub(x = var_5325, y = mean_67)[name = string("input_149")]; tensor var_5390_axes_0 = const()[name = string("op_5390_axes_0"), val = tensor([-1])]; tensor model_model_layers_8_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_8_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1036222720)))]; fp16 var_5378_to_fp16 = const()[name = string("op_5378_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5390_cast_fp16 = layer_norm(axes = var_5390_axes_0, epsilon = var_5378_to_fp16, gamma = model_model_layers_8_self_attn_q_norm_weight_to_fp16, x = input_149)[name = string("op_5390_cast_fp16")]; tensor mean_69_axes_0 = const()[name = string("mean_69_axes_0"), val = tensor([-1])]; bool mean_69_keep_dims_0 = const()[name = string("mean_69_keep_dims_0"), val = bool(true)]; tensor mean_69 = reduce_mean(axes = mean_69_axes_0, keep_dims = mean_69_keep_dims_0, x = var_5347)[name = string("mean_69")]; tensor input_151 = sub(x = var_5347, y = mean_69)[name = string("input_151")]; tensor var_5408_axes_0 = const()[name = string("op_5408_axes_0"), val = tensor([-1])]; tensor model_model_layers_8_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_8_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1036223040)))]; fp16 var_5396_to_fp16 = const()[name = string("op_5396_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5408_cast_fp16 = layer_norm(axes = var_5408_axes_0, epsilon = var_5396_to_fp16, gamma = model_model_layers_8_self_attn_k_norm_weight_to_fp16, x = input_151)[name = string("op_5408_cast_fp16")]; tensor var_5411 = mul(x = var_5390_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5411")]; tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_33 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = var_5390_cast_fp16)[name = string("x1_33")]; tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_33 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = var_5390_cast_fp16)[name = string("x2_33")]; fp16 const_149_promoted = const()[name = string("const_149_promoted"), val = fp16(-0x1p+0)]; tensor var_5432 = mul(x = x2_33, y = const_149_promoted)[name = string("op_5432")]; int32 var_5434 = const()[name = string("op_5434"), val = int32(-1)]; bool var_5435_interleave_0 = const()[name = string("op_5435_interleave_0"), val = bool(false)]; tensor var_5435 = concat(axis = var_5434, interleave = var_5435_interleave_0, values = (var_5432, x1_33))[name = string("op_5435")]; tensor var_5436 = mul(x = var_5435, y = sin_1_cast_fp16)[name = string("op_5436")]; tensor query_states_33 = add(x = var_5411, y = var_5436)[name = string("query_states_33")]; tensor var_5439 = mul(x = var_5408_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5439")]; tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_35 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = var_5408_cast_fp16)[name = string("x1_35")]; tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_35 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = var_5408_cast_fp16)[name = string("x2_35")]; fp16 const_152_promoted = const()[name = string("const_152_promoted"), val = fp16(-0x1p+0)]; tensor var_5460 = mul(x = x2_35, y = const_152_promoted)[name = string("op_5460")]; int32 var_5462 = const()[name = string("op_5462"), val = int32(-1)]; bool var_5463_interleave_0 = const()[name = string("op_5463_interleave_0"), val = bool(false)]; tensor var_5463 = concat(axis = var_5462, interleave = var_5463_interleave_0, values = (var_5460, x1_35))[name = string("op_5463")]; tensor var_5464 = mul(x = var_5463, y = sin_1_cast_fp16)[name = string("op_5464")]; tensor key_states_33 = add(x = var_5439, y = var_5464)[name = string("key_states_33")]; tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([8])]; tensor expand_dims_97 = const()[name = string("expand_dims_97"), val = tensor([0])]; tensor expand_dims_99 = const()[name = string("expand_dims_99"), val = tensor([0])]; tensor expand_dims_100 = const()[name = string("expand_dims_100"), val = tensor([9])]; int32 concat_66_axis_0 = const()[name = string("concat_66_axis_0"), val = int32(0)]; bool concat_66_interleave_0 = const()[name = string("concat_66_interleave_0"), val = bool(false)]; tensor concat_66 = concat(axis = concat_66_axis_0, interleave = concat_66_interleave_0, values = (expand_dims_96, expand_dims_97, current_pos, expand_dims_99))[name = string("concat_66")]; tensor concat_67_values1_0 = const()[name = string("concat_67_values1_0"), val = tensor([0])]; tensor concat_67_values3_0 = const()[name = string("concat_67_values3_0"), val = tensor([0])]; int32 concat_67_axis_0 = const()[name = string("concat_67_axis_0"), val = int32(0)]; bool concat_67_interleave_0 = const()[name = string("concat_67_interleave_0"), val = bool(false)]; tensor concat_67 = concat(axis = concat_67_axis_0, interleave = concat_67_interleave_0, values = (expand_dims_100, concat_67_values1_0, var_1725, concat_67_values3_0))[name = string("concat_67")]; tensor model_model_kv_cache_0_internal_tensor_assign_17_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_66, begin_mask = model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0, end = concat_67, end_mask = model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_17_stride_0, update = key_states_33, x = coreml_update_state_71)[name = string("model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_16_write_state")]; tensor coreml_update_state_72 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_16")]; tensor expand_dims_102 = const()[name = string("expand_dims_102"), val = tensor([36])]; tensor expand_dims_103 = const()[name = string("expand_dims_103"), val = tensor([0])]; tensor expand_dims_105 = const()[name = string("expand_dims_105"), val = tensor([0])]; tensor expand_dims_106 = const()[name = string("expand_dims_106"), val = tensor([37])]; int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)]; bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)]; tensor concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (expand_dims_102, expand_dims_103, current_pos, expand_dims_105))[name = string("concat_70")]; tensor concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor([0])]; tensor concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor([0])]; int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)]; bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)]; tensor concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (expand_dims_106, concat_71_values1_0, var_1725, concat_71_values3_0))[name = string("concat_71")]; tensor model_model_kv_cache_0_internal_tensor_assign_18_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_70, begin_mask = model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0, end = concat_71, end_mask = model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_18_stride_0, update = var_5369, x = coreml_update_state_72)[name = string("model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_17_write_state")]; tensor coreml_update_state_73 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_17")]; tensor var_5519_begin_0 = const()[name = string("op_5519_begin_0"), val = tensor([8, 0, 0, 0])]; tensor var_5519_end_0 = const()[name = string("op_5519_end_0"), val = tensor([9, 8, 1024, 128])]; tensor var_5519_end_mask_0 = const()[name = string("op_5519_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5519_cast_fp16 = slice_by_index(begin = var_5519_begin_0, end = var_5519_end_0, end_mask = var_5519_end_mask_0, x = coreml_update_state_73)[name = string("op_5519_cast_fp16")]; tensor K_layer_cache_17_axes_0 = const()[name = string("K_layer_cache_17_axes_0"), val = tensor([0])]; tensor K_layer_cache_17_cast_fp16 = squeeze(axes = K_layer_cache_17_axes_0, x = var_5519_cast_fp16)[name = string("K_layer_cache_17_cast_fp16")]; tensor var_5526_begin_0 = const()[name = string("op_5526_begin_0"), val = tensor([36, 0, 0, 0])]; tensor var_5526_end_0 = const()[name = string("op_5526_end_0"), val = tensor([37, 8, 1024, 128])]; tensor var_5526_end_mask_0 = const()[name = string("op_5526_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5526_cast_fp16 = slice_by_index(begin = var_5526_begin_0, end = var_5526_end_0, end_mask = var_5526_end_mask_0, x = coreml_update_state_73)[name = string("op_5526_cast_fp16")]; tensor V_layer_cache_17_axes_0 = const()[name = string("V_layer_cache_17_axes_0"), val = tensor([0])]; tensor V_layer_cache_17_cast_fp16 = squeeze(axes = V_layer_cache_17_axes_0, x = var_5526_cast_fp16)[name = string("V_layer_cache_17_cast_fp16")]; tensor x_167_axes_0 = const()[name = string("x_167_axes_0"), val = tensor([1])]; tensor x_167_cast_fp16 = expand_dims(axes = x_167_axes_0, x = K_layer_cache_17_cast_fp16)[name = string("x_167_cast_fp16")]; tensor var_5563 = const()[name = string("op_5563"), val = tensor([1, 2, 1, 1])]; tensor x_169_cast_fp16 = tile(reps = var_5563, x = x_167_cast_fp16)[name = string("x_169_cast_fp16")]; tensor var_5575 = const()[name = string("op_5575"), val = tensor([1, -1, 1024, 128])]; tensor key_states_35_cast_fp16 = reshape(shape = var_5575, x = x_169_cast_fp16)[name = string("key_states_35_cast_fp16")]; tensor x_173_axes_0 = const()[name = string("x_173_axes_0"), val = tensor([1])]; tensor x_173_cast_fp16 = expand_dims(axes = x_173_axes_0, x = V_layer_cache_17_cast_fp16)[name = string("x_173_cast_fp16")]; tensor var_5583 = const()[name = string("op_5583"), val = tensor([1, 2, 1, 1])]; tensor x_175_cast_fp16 = tile(reps = var_5583, x = x_173_cast_fp16)[name = string("x_175_cast_fp16")]; tensor var_5595 = const()[name = string("op_5595"), val = tensor([1, -1, 1024, 128])]; tensor value_states_51_cast_fp16 = reshape(shape = var_5595, x = x_175_cast_fp16)[name = string("value_states_51_cast_fp16")]; bool var_5610_transpose_x_1 = const()[name = string("op_5610_transpose_x_1"), val = bool(false)]; bool var_5610_transpose_y_1 = const()[name = string("op_5610_transpose_y_1"), val = bool(true)]; tensor var_5610 = matmul(transpose_x = var_5610_transpose_x_1, transpose_y = var_5610_transpose_y_1, x = query_states_33, y = key_states_35_cast_fp16)[name = string("op_5610")]; fp16 var_5611_to_fp16 = const()[name = string("op_5611_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_49_cast_fp16 = mul(x = var_5610, y = var_5611_to_fp16)[name = string("attn_weights_49_cast_fp16")]; tensor attn_weights_51_cast_fp16 = add(x = attn_weights_49_cast_fp16, y = causal_mask)[name = string("attn_weights_51_cast_fp16")]; int32 var_5646 = const()[name = string("op_5646"), val = int32(-1)]; tensor attn_weights_53_cast_fp16 = softmax(axis = var_5646, x = attn_weights_51_cast_fp16)[name = string("attn_weights_53_cast_fp16")]; bool attn_output_81_transpose_x_0 = const()[name = string("attn_output_81_transpose_x_0"), val = bool(false)]; bool attn_output_81_transpose_y_0 = const()[name = string("attn_output_81_transpose_y_0"), val = bool(false)]; tensor attn_output_81_cast_fp16 = matmul(transpose_x = attn_output_81_transpose_x_0, transpose_y = attn_output_81_transpose_y_0, x = attn_weights_53_cast_fp16, y = value_states_51_cast_fp16)[name = string("attn_output_81_cast_fp16")]; tensor var_5657_perm_0 = const()[name = string("op_5657_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_5661 = const()[name = string("op_5661"), val = tensor([1, 1, 2048])]; tensor var_5657_cast_fp16 = transpose(perm = var_5657_perm_0, x = attn_output_81_cast_fp16)[name = string("transpose_118")]; tensor attn_output_85_cast_fp16 = reshape(shape = var_5661, x = var_5657_cast_fp16)[name = string("attn_output_85_cast_fp16")]; tensor var_5666 = const()[name = string("op_5666"), val = tensor([0, 2, 1])]; string var_5682_pad_type_0 = const()[name = string("op_5682_pad_type_0"), val = string("valid")]; int32 var_5682_groups_0 = const()[name = string("op_5682_groups_0"), val = int32(1)]; tensor var_5682_strides_0 = const()[name = string("op_5682_strides_0"), val = tensor([1])]; tensor var_5682_pad_0 = const()[name = string("op_5682_pad_0"), val = tensor([0, 0])]; tensor var_5682_dilations_0 = const()[name = string("op_5682_dilations_0"), val = tensor([1])]; tensor squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1036223360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1039369152))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_5667_cast_fp16 = transpose(perm = var_5666, x = attn_output_85_cast_fp16)[name = string("transpose_117")]; tensor var_5682_cast_fp16 = conv(dilations = var_5682_dilations_0, groups = var_5682_groups_0, pad = var_5682_pad_0, pad_type = var_5682_pad_type_0, strides = var_5682_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_5667_cast_fp16)[name = string("op_5682_cast_fp16")]; tensor var_5686 = const()[name = string("op_5686"), val = tensor([0, 2, 1])]; tensor attn_output_89_cast_fp16 = transpose(perm = var_5686, x = var_5682_cast_fp16)[name = string("transpose_116")]; tensor hidden_states_53_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = attn_output_89_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; tensor mean_71_axes_0 = const()[name = string("mean_71_axes_0"), val = tensor([-1])]; bool mean_71_keep_dims_0 = const()[name = string("mean_71_keep_dims_0"), val = bool(true)]; tensor mean_71_cast_fp16 = reduce_mean(axes = mean_71_axes_0, keep_dims = mean_71_keep_dims_0, x = hidden_states_53_cast_fp16)[name = string("mean_71_cast_fp16")]; tensor input_155_cast_fp16 = sub(x = hidden_states_53_cast_fp16, y = mean_71_cast_fp16)[name = string("input_155_cast_fp16")]; tensor var_5705_axes_0 = const()[name = string("op_5705_axes_0"), val = tensor([-1])]; tensor model_model_layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1039401984)))]; fp16 var_5693_to_fp16 = const()[name = string("op_5693_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5705_cast_fp16 = layer_norm(axes = var_5705_axes_0, epsilon = var_5693_to_fp16, gamma = model_model_layers_8_post_attention_layernorm_weight_to_fp16, x = input_155_cast_fp16)[name = string("op_5705_cast_fp16")]; tensor var_5719 = const()[name = string("op_5719"), val = tensor([0, 2, 1])]; tensor input_157_axes_0 = const()[name = string("input_157_axes_0"), val = tensor([2])]; tensor var_5720 = transpose(perm = var_5719, x = var_5705_cast_fp16)[name = string("transpose_115")]; tensor input_157 = expand_dims(axes = input_157_axes_0, x = var_5720)[name = string("input_157")]; string input_159_pad_type_0 = const()[name = string("input_159_pad_type_0"), val = string("valid")]; tensor input_159_strides_0 = const()[name = string("input_159_strides_0"), val = tensor([1, 1])]; tensor input_159_pad_0 = const()[name = string("input_159_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_159_dilations_0 = const()[name = string("input_159_dilations_0"), val = tensor([1, 1])]; int32 input_159_groups_0 = const()[name = string("input_159_groups_0"), val = int32(1)]; tensor input_159 = conv(dilations = input_159_dilations_0, groups = input_159_groups_0, pad = input_159_pad_0, pad_type = input_159_pad_type_0, strides = input_159_strides_0, weight = model_model_layers_8_mlp_gate_proj_weight_palettized, x = input_157)[name = string("input_159")]; string b_17_pad_type_0 = const()[name = string("b_17_pad_type_0"), val = string("valid")]; tensor b_17_strides_0 = const()[name = string("b_17_strides_0"), val = tensor([1, 1])]; tensor b_17_pad_0 = const()[name = string("b_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_17_dilations_0 = const()[name = string("b_17_dilations_0"), val = tensor([1, 1])]; int32 b_17_groups_0 = const()[name = string("b_17_groups_0"), val = int32(1)]; tensor b_17 = conv(dilations = b_17_dilations_0, groups = b_17_groups_0, pad = b_17_pad_0, pad_type = b_17_pad_type_0, strides = b_17_strides_0, weight = model_model_layers_8_mlp_up_proj_weight_palettized, x = input_157)[name = string("b_17")]; tensor c_17 = silu(x = input_159)[name = string("c_17")]; tensor input_161 = mul(x = c_17, y = b_17)[name = string("input_161")]; string e_17_pad_type_0 = const()[name = string("e_17_pad_type_0"), val = string("valid")]; tensor e_17_strides_0 = const()[name = string("e_17_strides_0"), val = tensor([1, 1])]; tensor e_17_pad_0 = const()[name = string("e_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_17_dilations_0 = const()[name = string("e_17_dilations_0"), val = tensor([1, 1])]; int32 e_17_groups_0 = const()[name = string("e_17_groups_0"), val = int32(1)]; tensor e_17 = conv(dilations = e_17_dilations_0, groups = e_17_groups_0, pad = e_17_pad_0, pad_type = e_17_pad_type_0, strides = e_17_strides_0, weight = model_model_layers_8_mlp_down_proj_weight_palettized, x = input_161)[name = string("e_17")]; tensor var_5742_axes_0 = const()[name = string("op_5742_axes_0"), val = tensor([2])]; tensor var_5742 = squeeze(axes = var_5742_axes_0, x = e_17)[name = string("op_5742")]; tensor var_5743 = const()[name = string("op_5743"), val = tensor([0, 2, 1])]; tensor var_5744 = transpose(perm = var_5743, x = var_5742)[name = string("transpose_114")]; tensor hidden_states_55_cast_fp16 = add(x = hidden_states_53_cast_fp16, y = var_5744)[name = string("hidden_states_55_cast_fp16")]; tensor mean_73_axes_0 = const()[name = string("mean_73_axes_0"), val = tensor([-1])]; bool mean_73_keep_dims_0 = const()[name = string("mean_73_keep_dims_0"), val = bool(true)]; tensor mean_73_cast_fp16 = reduce_mean(axes = mean_73_axes_0, keep_dims = mean_73_keep_dims_0, x = hidden_states_55_cast_fp16)[name = string("mean_73_cast_fp16")]; tensor input_163_cast_fp16 = sub(x = hidden_states_55_cast_fp16, y = mean_73_cast_fp16)[name = string("input_163_cast_fp16")]; tensor var_5762_axes_0 = const()[name = string("op_5762_axes_0"), val = tensor([-1])]; tensor model_model_layers_9_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1039406144)))]; fp16 var_5750_to_fp16 = const()[name = string("op_5750_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5762_cast_fp16 = layer_norm(axes = var_5762_axes_0, epsilon = var_5750_to_fp16, gamma = model_model_layers_9_input_layernorm_weight_to_fp16, x = input_163_cast_fp16)[name = string("op_5762_cast_fp16")]; tensor var_5768 = const()[name = string("op_5768"), val = tensor([0, 2, 1])]; tensor var_5771_axes_0 = const()[name = string("op_5771_axes_0"), val = tensor([2])]; tensor var_5769 = transpose(perm = var_5768, x = var_5762_cast_fp16)[name = string("transpose_113")]; tensor var_5771 = expand_dims(axes = var_5771_axes_0, x = var_5769)[name = string("op_5771")]; string var_5787_pad_type_0 = const()[name = string("op_5787_pad_type_0"), val = string("valid")]; tensor var_5787_strides_0 = const()[name = string("op_5787_strides_0"), val = tensor([1, 1])]; tensor var_5787_pad_0 = const()[name = string("op_5787_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5787_dilations_0 = const()[name = string("op_5787_dilations_0"), val = tensor([1, 1])]; int32 var_5787_groups_0 = const()[name = string("op_5787_groups_0"), val = int32(1)]; tensor var_5787 = conv(dilations = var_5787_dilations_0, groups = var_5787_groups_0, pad = var_5787_pad_0, pad_type = var_5787_pad_type_0, strides = var_5787_strides_0, weight = model_model_layers_9_self_attn_q_proj_weight_palettized, x = var_5771)[name = string("op_5787")]; tensor var_5792 = const()[name = string("op_5792"), val = tensor([1, 16, 1, 128])]; tensor var_5793 = reshape(shape = var_5792, x = var_5787)[name = string("op_5793")]; string var_5809_pad_type_0 = const()[name = string("op_5809_pad_type_0"), val = string("valid")]; tensor var_5809_strides_0 = const()[name = string("op_5809_strides_0"), val = tensor([1, 1])]; tensor var_5809_pad_0 = const()[name = string("op_5809_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5809_dilations_0 = const()[name = string("op_5809_dilations_0"), val = tensor([1, 1])]; int32 var_5809_groups_0 = const()[name = string("op_5809_groups_0"), val = int32(1)]; tensor var_5809 = conv(dilations = var_5809_dilations_0, groups = var_5809_groups_0, pad = var_5809_pad_0, pad_type = var_5809_pad_type_0, strides = var_5809_strides_0, weight = model_model_layers_9_self_attn_k_proj_weight_palettized, x = var_5771)[name = string("op_5809")]; tensor var_5814 = const()[name = string("op_5814"), val = tensor([1, 8, 1, 128])]; tensor var_5815 = reshape(shape = var_5814, x = var_5809)[name = string("op_5815")]; string var_5831_pad_type_0 = const()[name = string("op_5831_pad_type_0"), val = string("valid")]; tensor var_5831_strides_0 = const()[name = string("op_5831_strides_0"), val = tensor([1, 1])]; tensor var_5831_pad_0 = const()[name = string("op_5831_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5831_dilations_0 = const()[name = string("op_5831_dilations_0"), val = tensor([1, 1])]; int32 var_5831_groups_0 = const()[name = string("op_5831_groups_0"), val = int32(1)]; tensor var_5831 = conv(dilations = var_5831_dilations_0, groups = var_5831_groups_0, pad = var_5831_pad_0, pad_type = var_5831_pad_type_0, strides = var_5831_strides_0, weight = model_model_layers_9_self_attn_v_proj_weight_palettized, x = var_5771)[name = string("op_5831")]; tensor var_5836 = const()[name = string("op_5836"), val = tensor([1, 8, 1, 128])]; tensor var_5837 = reshape(shape = var_5836, x = var_5831)[name = string("op_5837")]; tensor mean_75_axes_0 = const()[name = string("mean_75_axes_0"), val = tensor([-1])]; bool mean_75_keep_dims_0 = const()[name = string("mean_75_keep_dims_0"), val = bool(true)]; tensor mean_75 = reduce_mean(axes = mean_75_axes_0, keep_dims = mean_75_keep_dims_0, x = var_5793)[name = string("mean_75")]; tensor input_167 = sub(x = var_5793, y = mean_75)[name = string("input_167")]; tensor var_5858_axes_0 = const()[name = string("op_5858_axes_0"), val = tensor([-1])]; tensor model_model_layers_9_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_9_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1039410304)))]; fp16 var_5846_to_fp16 = const()[name = string("op_5846_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5858_cast_fp16 = layer_norm(axes = var_5858_axes_0, epsilon = var_5846_to_fp16, gamma = model_model_layers_9_self_attn_q_norm_weight_to_fp16, x = input_167)[name = string("op_5858_cast_fp16")]; tensor mean_77_axes_0 = const()[name = string("mean_77_axes_0"), val = tensor([-1])]; bool mean_77_keep_dims_0 = const()[name = string("mean_77_keep_dims_0"), val = bool(true)]; tensor mean_77 = reduce_mean(axes = mean_77_axes_0, keep_dims = mean_77_keep_dims_0, x = var_5815)[name = string("mean_77")]; tensor input_169 = sub(x = var_5815, y = mean_77)[name = string("input_169")]; tensor var_5876_axes_0 = const()[name = string("op_5876_axes_0"), val = tensor([-1])]; tensor model_model_layers_9_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_9_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1039410624)))]; fp16 var_5864_to_fp16 = const()[name = string("op_5864_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5876_cast_fp16 = layer_norm(axes = var_5876_axes_0, epsilon = var_5864_to_fp16, gamma = model_model_layers_9_self_attn_k_norm_weight_to_fp16, x = input_169)[name = string("op_5876_cast_fp16")]; tensor var_5879 = mul(x = var_5858_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5879")]; tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_37 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = var_5858_cast_fp16)[name = string("x1_37")]; tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_37 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = var_5858_cast_fp16)[name = string("x2_37")]; fp16 const_167_promoted = const()[name = string("const_167_promoted"), val = fp16(-0x1p+0)]; tensor var_5900 = mul(x = x2_37, y = const_167_promoted)[name = string("op_5900")]; int32 var_5902 = const()[name = string("op_5902"), val = int32(-1)]; bool var_5903_interleave_0 = const()[name = string("op_5903_interleave_0"), val = bool(false)]; tensor var_5903 = concat(axis = var_5902, interleave = var_5903_interleave_0, values = (var_5900, x1_37))[name = string("op_5903")]; tensor var_5904 = mul(x = var_5903, y = sin_1_cast_fp16)[name = string("op_5904")]; tensor query_states_37 = add(x = var_5879, y = var_5904)[name = string("query_states_37")]; tensor var_5907 = mul(x = var_5876_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5907")]; tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_39 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = var_5876_cast_fp16)[name = string("x1_39")]; tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_39 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = var_5876_cast_fp16)[name = string("x2_39")]; fp16 const_170_promoted = const()[name = string("const_170_promoted"), val = fp16(-0x1p+0)]; tensor var_5928 = mul(x = x2_39, y = const_170_promoted)[name = string("op_5928")]; int32 var_5930 = const()[name = string("op_5930"), val = int32(-1)]; bool var_5931_interleave_0 = const()[name = string("op_5931_interleave_0"), val = bool(false)]; tensor var_5931 = concat(axis = var_5930, interleave = var_5931_interleave_0, values = (var_5928, x1_39))[name = string("op_5931")]; tensor var_5932 = mul(x = var_5931, y = sin_1_cast_fp16)[name = string("op_5932")]; tensor key_states_37 = add(x = var_5907, y = var_5932)[name = string("key_states_37")]; tensor expand_dims_108 = const()[name = string("expand_dims_108"), val = tensor([9])]; tensor expand_dims_109 = const()[name = string("expand_dims_109"), val = tensor([0])]; tensor expand_dims_111 = const()[name = string("expand_dims_111"), val = tensor([0])]; tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([10])]; int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)]; bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)]; tensor concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (expand_dims_108, expand_dims_109, current_pos, expand_dims_111))[name = string("concat_74")]; tensor concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor([0])]; tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_112, concat_75_values1_0, var_1725, concat_75_values3_0))[name = string("concat_75")]; tensor model_model_kv_cache_0_internal_tensor_assign_19_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_74, begin_mask = model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0, end = concat_75, end_mask = model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_19_stride_0, update = key_states_37, x = coreml_update_state_73)[name = string("model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_18_write_state")]; tensor coreml_update_state_74 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_18")]; tensor expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor([37])]; tensor expand_dims_115 = const()[name = string("expand_dims_115"), val = tensor([0])]; tensor expand_dims_117 = const()[name = string("expand_dims_117"), val = tensor([0])]; tensor expand_dims_118 = const()[name = string("expand_dims_118"), val = tensor([38])]; int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)]; bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)]; tensor concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (expand_dims_114, expand_dims_115, current_pos, expand_dims_117))[name = string("concat_78")]; tensor concat_79_values1_0 = const()[name = string("concat_79_values1_0"), val = tensor([0])]; tensor concat_79_values3_0 = const()[name = string("concat_79_values3_0"), val = tensor([0])]; int32 concat_79_axis_0 = const()[name = string("concat_79_axis_0"), val = int32(0)]; bool concat_79_interleave_0 = const()[name = string("concat_79_interleave_0"), val = bool(false)]; tensor concat_79 = concat(axis = concat_79_axis_0, interleave = concat_79_interleave_0, values = (expand_dims_118, concat_79_values1_0, var_1725, concat_79_values3_0))[name = string("concat_79")]; tensor model_model_kv_cache_0_internal_tensor_assign_20_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_78, begin_mask = model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0, end = concat_79, end_mask = model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_20_stride_0, update = var_5837, x = coreml_update_state_74)[name = string("model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_19_write_state")]; tensor coreml_update_state_75 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_19")]; tensor var_5987_begin_0 = const()[name = string("op_5987_begin_0"), val = tensor([9, 0, 0, 0])]; tensor var_5987_end_0 = const()[name = string("op_5987_end_0"), val = tensor([10, 8, 1024, 128])]; tensor var_5987_end_mask_0 = const()[name = string("op_5987_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5987_cast_fp16 = slice_by_index(begin = var_5987_begin_0, end = var_5987_end_0, end_mask = var_5987_end_mask_0, x = coreml_update_state_75)[name = string("op_5987_cast_fp16")]; tensor K_layer_cache_19_axes_0 = const()[name = string("K_layer_cache_19_axes_0"), val = tensor([0])]; tensor K_layer_cache_19_cast_fp16 = squeeze(axes = K_layer_cache_19_axes_0, x = var_5987_cast_fp16)[name = string("K_layer_cache_19_cast_fp16")]; tensor var_5994_begin_0 = const()[name = string("op_5994_begin_0"), val = tensor([37, 0, 0, 0])]; tensor var_5994_end_0 = const()[name = string("op_5994_end_0"), val = tensor([38, 8, 1024, 128])]; tensor var_5994_end_mask_0 = const()[name = string("op_5994_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5994_cast_fp16 = slice_by_index(begin = var_5994_begin_0, end = var_5994_end_0, end_mask = var_5994_end_mask_0, x = coreml_update_state_75)[name = string("op_5994_cast_fp16")]; tensor V_layer_cache_19_axes_0 = const()[name = string("V_layer_cache_19_axes_0"), val = tensor([0])]; tensor V_layer_cache_19_cast_fp16 = squeeze(axes = V_layer_cache_19_axes_0, x = var_5994_cast_fp16)[name = string("V_layer_cache_19_cast_fp16")]; tensor x_187_axes_0 = const()[name = string("x_187_axes_0"), val = tensor([1])]; tensor x_187_cast_fp16 = expand_dims(axes = x_187_axes_0, x = K_layer_cache_19_cast_fp16)[name = string("x_187_cast_fp16")]; tensor var_6031 = const()[name = string("op_6031"), val = tensor([1, 2, 1, 1])]; tensor x_189_cast_fp16 = tile(reps = var_6031, x = x_187_cast_fp16)[name = string("x_189_cast_fp16")]; tensor var_6043 = const()[name = string("op_6043"), val = tensor([1, -1, 1024, 128])]; tensor key_states_39_cast_fp16 = reshape(shape = var_6043, x = x_189_cast_fp16)[name = string("key_states_39_cast_fp16")]; tensor x_193_axes_0 = const()[name = string("x_193_axes_0"), val = tensor([1])]; tensor x_193_cast_fp16 = expand_dims(axes = x_193_axes_0, x = V_layer_cache_19_cast_fp16)[name = string("x_193_cast_fp16")]; tensor var_6051 = const()[name = string("op_6051"), val = tensor([1, 2, 1, 1])]; tensor x_195_cast_fp16 = tile(reps = var_6051, x = x_193_cast_fp16)[name = string("x_195_cast_fp16")]; tensor var_6063 = const()[name = string("op_6063"), val = tensor([1, -1, 1024, 128])]; tensor value_states_57_cast_fp16 = reshape(shape = var_6063, x = x_195_cast_fp16)[name = string("value_states_57_cast_fp16")]; bool var_6078_transpose_x_1 = const()[name = string("op_6078_transpose_x_1"), val = bool(false)]; bool var_6078_transpose_y_1 = const()[name = string("op_6078_transpose_y_1"), val = bool(true)]; tensor var_6078 = matmul(transpose_x = var_6078_transpose_x_1, transpose_y = var_6078_transpose_y_1, x = query_states_37, y = key_states_39_cast_fp16)[name = string("op_6078")]; fp16 var_6079_to_fp16 = const()[name = string("op_6079_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_55_cast_fp16 = mul(x = var_6078, y = var_6079_to_fp16)[name = string("attn_weights_55_cast_fp16")]; tensor attn_weights_57_cast_fp16 = add(x = attn_weights_55_cast_fp16, y = causal_mask)[name = string("attn_weights_57_cast_fp16")]; int32 var_6114 = const()[name = string("op_6114"), val = int32(-1)]; tensor attn_weights_59_cast_fp16 = softmax(axis = var_6114, x = attn_weights_57_cast_fp16)[name = string("attn_weights_59_cast_fp16")]; bool attn_output_91_transpose_x_0 = const()[name = string("attn_output_91_transpose_x_0"), val = bool(false)]; bool attn_output_91_transpose_y_0 = const()[name = string("attn_output_91_transpose_y_0"), val = bool(false)]; tensor attn_output_91_cast_fp16 = matmul(transpose_x = attn_output_91_transpose_x_0, transpose_y = attn_output_91_transpose_y_0, x = attn_weights_59_cast_fp16, y = value_states_57_cast_fp16)[name = string("attn_output_91_cast_fp16")]; tensor var_6125_perm_0 = const()[name = string("op_6125_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_6129 = const()[name = string("op_6129"), val = tensor([1, 1, 2048])]; tensor var_6125_cast_fp16 = transpose(perm = var_6125_perm_0, x = attn_output_91_cast_fp16)[name = string("transpose_112")]; tensor attn_output_95_cast_fp16 = reshape(shape = var_6129, x = var_6125_cast_fp16)[name = string("attn_output_95_cast_fp16")]; tensor var_6134 = const()[name = string("op_6134"), val = tensor([0, 2, 1])]; string var_6150_pad_type_0 = const()[name = string("op_6150_pad_type_0"), val = string("valid")]; int32 var_6150_groups_0 = const()[name = string("op_6150_groups_0"), val = int32(1)]; tensor var_6150_strides_0 = const()[name = string("op_6150_strides_0"), val = tensor([1])]; tensor var_6150_pad_0 = const()[name = string("op_6150_pad_0"), val = tensor([0, 0])]; tensor var_6150_dilations_0 = const()[name = string("op_6150_dilations_0"), val = tensor([1])]; tensor squeeze_9_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1039410944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1042556736))))[name = string("squeeze_9_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_6135_cast_fp16 = transpose(perm = var_6134, x = attn_output_95_cast_fp16)[name = string("transpose_111")]; tensor var_6150_cast_fp16 = conv(dilations = var_6150_dilations_0, groups = var_6150_groups_0, pad = var_6150_pad_0, pad_type = var_6150_pad_type_0, strides = var_6150_strides_0, weight = squeeze_9_cast_fp16_to_fp32_to_fp16_palettized, x = var_6135_cast_fp16)[name = string("op_6150_cast_fp16")]; tensor var_6154 = const()[name = string("op_6154"), val = tensor([0, 2, 1])]; tensor attn_output_99_cast_fp16 = transpose(perm = var_6154, x = var_6150_cast_fp16)[name = string("transpose_110")]; tensor hidden_states_59_cast_fp16 = add(x = hidden_states_55_cast_fp16, y = attn_output_99_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; tensor mean_79_axes_0 = const()[name = string("mean_79_axes_0"), val = tensor([-1])]; bool mean_79_keep_dims_0 = const()[name = string("mean_79_keep_dims_0"), val = bool(true)]; tensor mean_79_cast_fp16 = reduce_mean(axes = mean_79_axes_0, keep_dims = mean_79_keep_dims_0, x = hidden_states_59_cast_fp16)[name = string("mean_79_cast_fp16")]; tensor input_173_cast_fp16 = sub(x = hidden_states_59_cast_fp16, y = mean_79_cast_fp16)[name = string("input_173_cast_fp16")]; tensor var_6173_axes_0 = const()[name = string("op_6173_axes_0"), val = tensor([-1])]; tensor model_model_layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1042589568)))]; fp16 var_6161_to_fp16 = const()[name = string("op_6161_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6173_cast_fp16 = layer_norm(axes = var_6173_axes_0, epsilon = var_6161_to_fp16, gamma = model_model_layers_9_post_attention_layernorm_weight_to_fp16, x = input_173_cast_fp16)[name = string("op_6173_cast_fp16")]; tensor var_6187 = const()[name = string("op_6187"), val = tensor([0, 2, 1])]; tensor input_175_axes_0 = const()[name = string("input_175_axes_0"), val = tensor([2])]; tensor var_6188 = transpose(perm = var_6187, x = var_6173_cast_fp16)[name = string("transpose_109")]; tensor input_175 = expand_dims(axes = input_175_axes_0, x = var_6188)[name = string("input_175")]; string input_177_pad_type_0 = const()[name = string("input_177_pad_type_0"), val = string("valid")]; tensor input_177_strides_0 = const()[name = string("input_177_strides_0"), val = tensor([1, 1])]; tensor input_177_pad_0 = const()[name = string("input_177_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_177_dilations_0 = const()[name = string("input_177_dilations_0"), val = tensor([1, 1])]; int32 input_177_groups_0 = const()[name = string("input_177_groups_0"), val = int32(1)]; tensor input_177 = conv(dilations = input_177_dilations_0, groups = input_177_groups_0, pad = input_177_pad_0, pad_type = input_177_pad_type_0, strides = input_177_strides_0, weight = model_model_layers_9_mlp_gate_proj_weight_palettized, x = input_175)[name = string("input_177")]; string b_19_pad_type_0 = const()[name = string("b_19_pad_type_0"), val = string("valid")]; tensor b_19_strides_0 = const()[name = string("b_19_strides_0"), val = tensor([1, 1])]; tensor b_19_pad_0 = const()[name = string("b_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_19_dilations_0 = const()[name = string("b_19_dilations_0"), val = tensor([1, 1])]; int32 b_19_groups_0 = const()[name = string("b_19_groups_0"), val = int32(1)]; tensor b_19 = conv(dilations = b_19_dilations_0, groups = b_19_groups_0, pad = b_19_pad_0, pad_type = b_19_pad_type_0, strides = b_19_strides_0, weight = model_model_layers_9_mlp_up_proj_weight_palettized, x = input_175)[name = string("b_19")]; tensor c_19 = silu(x = input_177)[name = string("c_19")]; tensor input_179 = mul(x = c_19, y = b_19)[name = string("input_179")]; string e_19_pad_type_0 = const()[name = string("e_19_pad_type_0"), val = string("valid")]; tensor e_19_strides_0 = const()[name = string("e_19_strides_0"), val = tensor([1, 1])]; tensor e_19_pad_0 = const()[name = string("e_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_19_dilations_0 = const()[name = string("e_19_dilations_0"), val = tensor([1, 1])]; int32 e_19_groups_0 = const()[name = string("e_19_groups_0"), val = int32(1)]; tensor e_19 = conv(dilations = e_19_dilations_0, groups = e_19_groups_0, pad = e_19_pad_0, pad_type = e_19_pad_type_0, strides = e_19_strides_0, weight = model_model_layers_9_mlp_down_proj_weight_palettized, x = input_179)[name = string("e_19")]; tensor var_6210_axes_0 = const()[name = string("op_6210_axes_0"), val = tensor([2])]; tensor var_6210 = squeeze(axes = var_6210_axes_0, x = e_19)[name = string("op_6210")]; tensor var_6211 = const()[name = string("op_6211"), val = tensor([0, 2, 1])]; tensor var_6212 = transpose(perm = var_6211, x = var_6210)[name = string("transpose_108")]; tensor hidden_states_61_cast_fp16 = add(x = hidden_states_59_cast_fp16, y = var_6212)[name = string("hidden_states_61_cast_fp16")]; tensor mean_81_axes_0 = const()[name = string("mean_81_axes_0"), val = tensor([-1])]; bool mean_81_keep_dims_0 = const()[name = string("mean_81_keep_dims_0"), val = bool(true)]; tensor mean_81_cast_fp16 = reduce_mean(axes = mean_81_axes_0, keep_dims = mean_81_keep_dims_0, x = hidden_states_61_cast_fp16)[name = string("mean_81_cast_fp16")]; tensor input_181_cast_fp16 = sub(x = hidden_states_61_cast_fp16, y = mean_81_cast_fp16)[name = string("input_181_cast_fp16")]; tensor var_6230_axes_0 = const()[name = string("op_6230_axes_0"), val = tensor([-1])]; tensor model_model_layers_10_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1042593728)))]; fp16 var_6218_to_fp16 = const()[name = string("op_6218_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6230_cast_fp16 = layer_norm(axes = var_6230_axes_0, epsilon = var_6218_to_fp16, gamma = model_model_layers_10_input_layernorm_weight_to_fp16, x = input_181_cast_fp16)[name = string("op_6230_cast_fp16")]; tensor var_6236 = const()[name = string("op_6236"), val = tensor([0, 2, 1])]; tensor var_6239_axes_0 = const()[name = string("op_6239_axes_0"), val = tensor([2])]; tensor var_6237 = transpose(perm = var_6236, x = var_6230_cast_fp16)[name = string("transpose_107")]; tensor var_6239 = expand_dims(axes = var_6239_axes_0, x = var_6237)[name = string("op_6239")]; string var_6255_pad_type_0 = const()[name = string("op_6255_pad_type_0"), val = string("valid")]; tensor var_6255_strides_0 = const()[name = string("op_6255_strides_0"), val = tensor([1, 1])]; tensor var_6255_pad_0 = const()[name = string("op_6255_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6255_dilations_0 = const()[name = string("op_6255_dilations_0"), val = tensor([1, 1])]; int32 var_6255_groups_0 = const()[name = string("op_6255_groups_0"), val = int32(1)]; tensor var_6255 = conv(dilations = var_6255_dilations_0, groups = var_6255_groups_0, pad = var_6255_pad_0, pad_type = var_6255_pad_type_0, strides = var_6255_strides_0, weight = model_model_layers_10_self_attn_q_proj_weight_palettized, x = var_6239)[name = string("op_6255")]; tensor var_6260 = const()[name = string("op_6260"), val = tensor([1, 16, 1, 128])]; tensor var_6261 = reshape(shape = var_6260, x = var_6255)[name = string("op_6261")]; string var_6277_pad_type_0 = const()[name = string("op_6277_pad_type_0"), val = string("valid")]; tensor var_6277_strides_0 = const()[name = string("op_6277_strides_0"), val = tensor([1, 1])]; tensor var_6277_pad_0 = const()[name = string("op_6277_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6277_dilations_0 = const()[name = string("op_6277_dilations_0"), val = tensor([1, 1])]; int32 var_6277_groups_0 = const()[name = string("op_6277_groups_0"), val = int32(1)]; tensor var_6277 = conv(dilations = var_6277_dilations_0, groups = var_6277_groups_0, pad = var_6277_pad_0, pad_type = var_6277_pad_type_0, strides = var_6277_strides_0, weight = model_model_layers_10_self_attn_k_proj_weight_palettized, x = var_6239)[name = string("op_6277")]; tensor var_6282 = const()[name = string("op_6282"), val = tensor([1, 8, 1, 128])]; tensor var_6283 = reshape(shape = var_6282, x = var_6277)[name = string("op_6283")]; string var_6299_pad_type_0 = const()[name = string("op_6299_pad_type_0"), val = string("valid")]; tensor var_6299_strides_0 = const()[name = string("op_6299_strides_0"), val = tensor([1, 1])]; tensor var_6299_pad_0 = const()[name = string("op_6299_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6299_dilations_0 = const()[name = string("op_6299_dilations_0"), val = tensor([1, 1])]; int32 var_6299_groups_0 = const()[name = string("op_6299_groups_0"), val = int32(1)]; tensor var_6299 = conv(dilations = var_6299_dilations_0, groups = var_6299_groups_0, pad = var_6299_pad_0, pad_type = var_6299_pad_type_0, strides = var_6299_strides_0, weight = model_model_layers_10_self_attn_v_proj_weight_palettized, x = var_6239)[name = string("op_6299")]; tensor var_6304 = const()[name = string("op_6304"), val = tensor([1, 8, 1, 128])]; tensor var_6305 = reshape(shape = var_6304, x = var_6299)[name = string("op_6305")]; tensor mean_83_axes_0 = const()[name = string("mean_83_axes_0"), val = tensor([-1])]; bool mean_83_keep_dims_0 = const()[name = string("mean_83_keep_dims_0"), val = bool(true)]; tensor mean_83 = reduce_mean(axes = mean_83_axes_0, keep_dims = mean_83_keep_dims_0, x = var_6261)[name = string("mean_83")]; tensor input_185 = sub(x = var_6261, y = mean_83)[name = string("input_185")]; tensor var_6326_axes_0 = const()[name = string("op_6326_axes_0"), val = tensor([-1])]; tensor model_model_layers_10_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_10_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1042597888)))]; fp16 var_6314_to_fp16 = const()[name = string("op_6314_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6326_cast_fp16 = layer_norm(axes = var_6326_axes_0, epsilon = var_6314_to_fp16, gamma = model_model_layers_10_self_attn_q_norm_weight_to_fp16, x = input_185)[name = string("op_6326_cast_fp16")]; tensor mean_85_axes_0 = const()[name = string("mean_85_axes_0"), val = tensor([-1])]; bool mean_85_keep_dims_0 = const()[name = string("mean_85_keep_dims_0"), val = bool(true)]; tensor mean_85 = reduce_mean(axes = mean_85_axes_0, keep_dims = mean_85_keep_dims_0, x = var_6283)[name = string("mean_85")]; tensor input_187 = sub(x = var_6283, y = mean_85)[name = string("input_187")]; tensor var_6344_axes_0 = const()[name = string("op_6344_axes_0"), val = tensor([-1])]; tensor model_model_layers_10_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_10_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1042598208)))]; fp16 var_6332_to_fp16 = const()[name = string("op_6332_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6344_cast_fp16 = layer_norm(axes = var_6344_axes_0, epsilon = var_6332_to_fp16, gamma = model_model_layers_10_self_attn_k_norm_weight_to_fp16, x = input_187)[name = string("op_6344_cast_fp16")]; tensor var_6347 = mul(x = var_6326_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6347")]; tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_41 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = var_6326_cast_fp16)[name = string("x1_41")]; tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_41 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = var_6326_cast_fp16)[name = string("x2_41")]; fp16 const_185_promoted = const()[name = string("const_185_promoted"), val = fp16(-0x1p+0)]; tensor var_6368 = mul(x = x2_41, y = const_185_promoted)[name = string("op_6368")]; int32 var_6370 = const()[name = string("op_6370"), val = int32(-1)]; bool var_6371_interleave_0 = const()[name = string("op_6371_interleave_0"), val = bool(false)]; tensor var_6371 = concat(axis = var_6370, interleave = var_6371_interleave_0, values = (var_6368, x1_41))[name = string("op_6371")]; tensor var_6372 = mul(x = var_6371, y = sin_1_cast_fp16)[name = string("op_6372")]; tensor query_states_41 = add(x = var_6347, y = var_6372)[name = string("query_states_41")]; tensor var_6375 = mul(x = var_6344_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6375")]; tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_43 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = var_6344_cast_fp16)[name = string("x1_43")]; tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_43 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = var_6344_cast_fp16)[name = string("x2_43")]; fp16 const_188_promoted = const()[name = string("const_188_promoted"), val = fp16(-0x1p+0)]; tensor var_6396 = mul(x = x2_43, y = const_188_promoted)[name = string("op_6396")]; int32 var_6398 = const()[name = string("op_6398"), val = int32(-1)]; bool var_6399_interleave_0 = const()[name = string("op_6399_interleave_0"), val = bool(false)]; tensor var_6399 = concat(axis = var_6398, interleave = var_6399_interleave_0, values = (var_6396, x1_43))[name = string("op_6399")]; tensor var_6400 = mul(x = var_6399, y = sin_1_cast_fp16)[name = string("op_6400")]; tensor key_states_41 = add(x = var_6375, y = var_6400)[name = string("key_states_41")]; tensor expand_dims_120 = const()[name = string("expand_dims_120"), val = tensor([10])]; tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([0])]; tensor expand_dims_123 = const()[name = string("expand_dims_123"), val = tensor([0])]; tensor expand_dims_124 = const()[name = string("expand_dims_124"), val = tensor([11])]; int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)]; bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)]; tensor concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (expand_dims_120, expand_dims_121, current_pos, expand_dims_123))[name = string("concat_82")]; tensor concat_83_values1_0 = const()[name = string("concat_83_values1_0"), val = tensor([0])]; tensor concat_83_values3_0 = const()[name = string("concat_83_values3_0"), val = tensor([0])]; int32 concat_83_axis_0 = const()[name = string("concat_83_axis_0"), val = int32(0)]; bool concat_83_interleave_0 = const()[name = string("concat_83_interleave_0"), val = bool(false)]; tensor concat_83 = concat(axis = concat_83_axis_0, interleave = concat_83_interleave_0, values = (expand_dims_124, concat_83_values1_0, var_1725, concat_83_values3_0))[name = string("concat_83")]; tensor model_model_kv_cache_0_internal_tensor_assign_21_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_82, begin_mask = model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0, end = concat_83, end_mask = model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_21_stride_0, update = key_states_41, x = coreml_update_state_75)[name = string("model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_20_write_state")]; tensor coreml_update_state_76 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_20")]; tensor expand_dims_126 = const()[name = string("expand_dims_126"), val = tensor([38])]; tensor expand_dims_127 = const()[name = string("expand_dims_127"), val = tensor([0])]; tensor expand_dims_129 = const()[name = string("expand_dims_129"), val = tensor([0])]; tensor expand_dims_130 = const()[name = string("expand_dims_130"), val = tensor([39])]; int32 concat_86_axis_0 = const()[name = string("concat_86_axis_0"), val = int32(0)]; bool concat_86_interleave_0 = const()[name = string("concat_86_interleave_0"), val = bool(false)]; tensor concat_86 = concat(axis = concat_86_axis_0, interleave = concat_86_interleave_0, values = (expand_dims_126, expand_dims_127, current_pos, expand_dims_129))[name = string("concat_86")]; tensor concat_87_values1_0 = const()[name = string("concat_87_values1_0"), val = tensor([0])]; tensor concat_87_values3_0 = const()[name = string("concat_87_values3_0"), val = tensor([0])]; int32 concat_87_axis_0 = const()[name = string("concat_87_axis_0"), val = int32(0)]; bool concat_87_interleave_0 = const()[name = string("concat_87_interleave_0"), val = bool(false)]; tensor concat_87 = concat(axis = concat_87_axis_0, interleave = concat_87_interleave_0, values = (expand_dims_130, concat_87_values1_0, var_1725, concat_87_values3_0))[name = string("concat_87")]; tensor model_model_kv_cache_0_internal_tensor_assign_22_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_86, begin_mask = model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0, end = concat_87, end_mask = model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_22_stride_0, update = var_6305, x = coreml_update_state_76)[name = string("model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_21_write_state")]; tensor coreml_update_state_77 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_21")]; tensor var_6455_begin_0 = const()[name = string("op_6455_begin_0"), val = tensor([10, 0, 0, 0])]; tensor var_6455_end_0 = const()[name = string("op_6455_end_0"), val = tensor([11, 8, 1024, 128])]; tensor var_6455_end_mask_0 = const()[name = string("op_6455_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6455_cast_fp16 = slice_by_index(begin = var_6455_begin_0, end = var_6455_end_0, end_mask = var_6455_end_mask_0, x = coreml_update_state_77)[name = string("op_6455_cast_fp16")]; tensor K_layer_cache_21_axes_0 = const()[name = string("K_layer_cache_21_axes_0"), val = tensor([0])]; tensor K_layer_cache_21_cast_fp16 = squeeze(axes = K_layer_cache_21_axes_0, x = var_6455_cast_fp16)[name = string("K_layer_cache_21_cast_fp16")]; tensor var_6462_begin_0 = const()[name = string("op_6462_begin_0"), val = tensor([38, 0, 0, 0])]; tensor var_6462_end_0 = const()[name = string("op_6462_end_0"), val = tensor([39, 8, 1024, 128])]; tensor var_6462_end_mask_0 = const()[name = string("op_6462_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6462_cast_fp16 = slice_by_index(begin = var_6462_begin_0, end = var_6462_end_0, end_mask = var_6462_end_mask_0, x = coreml_update_state_77)[name = string("op_6462_cast_fp16")]; tensor V_layer_cache_21_axes_0 = const()[name = string("V_layer_cache_21_axes_0"), val = tensor([0])]; tensor V_layer_cache_21_cast_fp16 = squeeze(axes = V_layer_cache_21_axes_0, x = var_6462_cast_fp16)[name = string("V_layer_cache_21_cast_fp16")]; tensor x_207_axes_0 = const()[name = string("x_207_axes_0"), val = tensor([1])]; tensor x_207_cast_fp16 = expand_dims(axes = x_207_axes_0, x = K_layer_cache_21_cast_fp16)[name = string("x_207_cast_fp16")]; tensor var_6499 = const()[name = string("op_6499"), val = tensor([1, 2, 1, 1])]; tensor x_209_cast_fp16 = tile(reps = var_6499, x = x_207_cast_fp16)[name = string("x_209_cast_fp16")]; tensor var_6511 = const()[name = string("op_6511"), val = tensor([1, -1, 1024, 128])]; tensor key_states_43_cast_fp16 = reshape(shape = var_6511, x = x_209_cast_fp16)[name = string("key_states_43_cast_fp16")]; tensor x_213_axes_0 = const()[name = string("x_213_axes_0"), val = tensor([1])]; tensor x_213_cast_fp16 = expand_dims(axes = x_213_axes_0, x = V_layer_cache_21_cast_fp16)[name = string("x_213_cast_fp16")]; tensor var_6519 = const()[name = string("op_6519"), val = tensor([1, 2, 1, 1])]; tensor x_215_cast_fp16 = tile(reps = var_6519, x = x_213_cast_fp16)[name = string("x_215_cast_fp16")]; tensor var_6531 = const()[name = string("op_6531"), val = tensor([1, -1, 1024, 128])]; tensor value_states_63_cast_fp16 = reshape(shape = var_6531, x = x_215_cast_fp16)[name = string("value_states_63_cast_fp16")]; bool var_6546_transpose_x_1 = const()[name = string("op_6546_transpose_x_1"), val = bool(false)]; bool var_6546_transpose_y_1 = const()[name = string("op_6546_transpose_y_1"), val = bool(true)]; tensor var_6546 = matmul(transpose_x = var_6546_transpose_x_1, transpose_y = var_6546_transpose_y_1, x = query_states_41, y = key_states_43_cast_fp16)[name = string("op_6546")]; fp16 var_6547_to_fp16 = const()[name = string("op_6547_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_61_cast_fp16 = mul(x = var_6546, y = var_6547_to_fp16)[name = string("attn_weights_61_cast_fp16")]; tensor attn_weights_63_cast_fp16 = add(x = attn_weights_61_cast_fp16, y = causal_mask)[name = string("attn_weights_63_cast_fp16")]; int32 var_6582 = const()[name = string("op_6582"), val = int32(-1)]; tensor attn_weights_65_cast_fp16 = softmax(axis = var_6582, x = attn_weights_63_cast_fp16)[name = string("attn_weights_65_cast_fp16")]; bool attn_output_101_transpose_x_0 = const()[name = string("attn_output_101_transpose_x_0"), val = bool(false)]; bool attn_output_101_transpose_y_0 = const()[name = string("attn_output_101_transpose_y_0"), val = bool(false)]; tensor attn_output_101_cast_fp16 = matmul(transpose_x = attn_output_101_transpose_x_0, transpose_y = attn_output_101_transpose_y_0, x = attn_weights_65_cast_fp16, y = value_states_63_cast_fp16)[name = string("attn_output_101_cast_fp16")]; tensor var_6593_perm_0 = const()[name = string("op_6593_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_6597 = const()[name = string("op_6597"), val = tensor([1, 1, 2048])]; tensor var_6593_cast_fp16 = transpose(perm = var_6593_perm_0, x = attn_output_101_cast_fp16)[name = string("transpose_106")]; tensor attn_output_105_cast_fp16 = reshape(shape = var_6597, x = var_6593_cast_fp16)[name = string("attn_output_105_cast_fp16")]; tensor var_6602 = const()[name = string("op_6602"), val = tensor([0, 2, 1])]; string var_6618_pad_type_0 = const()[name = string("op_6618_pad_type_0"), val = string("valid")]; int32 var_6618_groups_0 = const()[name = string("op_6618_groups_0"), val = int32(1)]; tensor var_6618_strides_0 = const()[name = string("op_6618_strides_0"), val = tensor([1])]; tensor var_6618_pad_0 = const()[name = string("op_6618_pad_0"), val = tensor([0, 0])]; tensor var_6618_dilations_0 = const()[name = string("op_6618_dilations_0"), val = tensor([1])]; tensor squeeze_10_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1042598528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1045744320))))[name = string("squeeze_10_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_6603_cast_fp16 = transpose(perm = var_6602, x = attn_output_105_cast_fp16)[name = string("transpose_105")]; tensor var_6618_cast_fp16 = conv(dilations = var_6618_dilations_0, groups = var_6618_groups_0, pad = var_6618_pad_0, pad_type = var_6618_pad_type_0, strides = var_6618_strides_0, weight = squeeze_10_cast_fp16_to_fp32_to_fp16_palettized, x = var_6603_cast_fp16)[name = string("op_6618_cast_fp16")]; tensor var_6622 = const()[name = string("op_6622"), val = tensor([0, 2, 1])]; tensor attn_output_109_cast_fp16 = transpose(perm = var_6622, x = var_6618_cast_fp16)[name = string("transpose_104")]; tensor hidden_states_65_cast_fp16 = add(x = hidden_states_61_cast_fp16, y = attn_output_109_cast_fp16)[name = string("hidden_states_65_cast_fp16")]; tensor mean_87_axes_0 = const()[name = string("mean_87_axes_0"), val = tensor([-1])]; bool mean_87_keep_dims_0 = const()[name = string("mean_87_keep_dims_0"), val = bool(true)]; tensor mean_87_cast_fp16 = reduce_mean(axes = mean_87_axes_0, keep_dims = mean_87_keep_dims_0, x = hidden_states_65_cast_fp16)[name = string("mean_87_cast_fp16")]; tensor input_191_cast_fp16 = sub(x = hidden_states_65_cast_fp16, y = mean_87_cast_fp16)[name = string("input_191_cast_fp16")]; tensor var_6641_axes_0 = const()[name = string("op_6641_axes_0"), val = tensor([-1])]; tensor model_model_layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1045777152)))]; fp16 var_6629_to_fp16 = const()[name = string("op_6629_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6641_cast_fp16 = layer_norm(axes = var_6641_axes_0, epsilon = var_6629_to_fp16, gamma = model_model_layers_10_post_attention_layernorm_weight_to_fp16, x = input_191_cast_fp16)[name = string("op_6641_cast_fp16")]; tensor var_6655 = const()[name = string("op_6655"), val = tensor([0, 2, 1])]; tensor input_193_axes_0 = const()[name = string("input_193_axes_0"), val = tensor([2])]; tensor var_6656 = transpose(perm = var_6655, x = var_6641_cast_fp16)[name = string("transpose_103")]; tensor input_193 = expand_dims(axes = input_193_axes_0, x = var_6656)[name = string("input_193")]; string input_195_pad_type_0 = const()[name = string("input_195_pad_type_0"), val = string("valid")]; tensor input_195_strides_0 = const()[name = string("input_195_strides_0"), val = tensor([1, 1])]; tensor input_195_pad_0 = const()[name = string("input_195_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_195_dilations_0 = const()[name = string("input_195_dilations_0"), val = tensor([1, 1])]; int32 input_195_groups_0 = const()[name = string("input_195_groups_0"), val = int32(1)]; tensor input_195 = conv(dilations = input_195_dilations_0, groups = input_195_groups_0, pad = input_195_pad_0, pad_type = input_195_pad_type_0, strides = input_195_strides_0, weight = model_model_layers_10_mlp_gate_proj_weight_palettized, x = input_193)[name = string("input_195")]; string b_21_pad_type_0 = const()[name = string("b_21_pad_type_0"), val = string("valid")]; tensor b_21_strides_0 = const()[name = string("b_21_strides_0"), val = tensor([1, 1])]; tensor b_21_pad_0 = const()[name = string("b_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_21_dilations_0 = const()[name = string("b_21_dilations_0"), val = tensor([1, 1])]; int32 b_21_groups_0 = const()[name = string("b_21_groups_0"), val = int32(1)]; tensor b_21 = conv(dilations = b_21_dilations_0, groups = b_21_groups_0, pad = b_21_pad_0, pad_type = b_21_pad_type_0, strides = b_21_strides_0, weight = model_model_layers_10_mlp_up_proj_weight_palettized, x = input_193)[name = string("b_21")]; tensor c_21 = silu(x = input_195)[name = string("c_21")]; tensor input_197 = mul(x = c_21, y = b_21)[name = string("input_197")]; string e_21_pad_type_0 = const()[name = string("e_21_pad_type_0"), val = string("valid")]; tensor e_21_strides_0 = const()[name = string("e_21_strides_0"), val = tensor([1, 1])]; tensor e_21_pad_0 = const()[name = string("e_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_21_dilations_0 = const()[name = string("e_21_dilations_0"), val = tensor([1, 1])]; int32 e_21_groups_0 = const()[name = string("e_21_groups_0"), val = int32(1)]; tensor e_21 = conv(dilations = e_21_dilations_0, groups = e_21_groups_0, pad = e_21_pad_0, pad_type = e_21_pad_type_0, strides = e_21_strides_0, weight = model_model_layers_10_mlp_down_proj_weight_palettized, x = input_197)[name = string("e_21")]; tensor var_6678_axes_0 = const()[name = string("op_6678_axes_0"), val = tensor([2])]; tensor var_6678 = squeeze(axes = var_6678_axes_0, x = e_21)[name = string("op_6678")]; tensor var_6679 = const()[name = string("op_6679"), val = tensor([0, 2, 1])]; tensor var_6680 = transpose(perm = var_6679, x = var_6678)[name = string("transpose_102")]; tensor hidden_states_67_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = var_6680)[name = string("hidden_states_67_cast_fp16")]; tensor mean_89_axes_0 = const()[name = string("mean_89_axes_0"), val = tensor([-1])]; bool mean_89_keep_dims_0 = const()[name = string("mean_89_keep_dims_0"), val = bool(true)]; tensor mean_89_cast_fp16 = reduce_mean(axes = mean_89_axes_0, keep_dims = mean_89_keep_dims_0, x = hidden_states_67_cast_fp16)[name = string("mean_89_cast_fp16")]; tensor input_199_cast_fp16 = sub(x = hidden_states_67_cast_fp16, y = mean_89_cast_fp16)[name = string("input_199_cast_fp16")]; tensor var_6698_axes_0 = const()[name = string("op_6698_axes_0"), val = tensor([-1])]; tensor model_model_layers_11_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1045781312)))]; fp16 var_6686_to_fp16 = const()[name = string("op_6686_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6698_cast_fp16 = layer_norm(axes = var_6698_axes_0, epsilon = var_6686_to_fp16, gamma = model_model_layers_11_input_layernorm_weight_to_fp16, x = input_199_cast_fp16)[name = string("op_6698_cast_fp16")]; tensor var_6704 = const()[name = string("op_6704"), val = tensor([0, 2, 1])]; tensor var_6707_axes_0 = const()[name = string("op_6707_axes_0"), val = tensor([2])]; tensor var_6705 = transpose(perm = var_6704, x = var_6698_cast_fp16)[name = string("transpose_101")]; tensor var_6707 = expand_dims(axes = var_6707_axes_0, x = var_6705)[name = string("op_6707")]; string var_6723_pad_type_0 = const()[name = string("op_6723_pad_type_0"), val = string("valid")]; tensor var_6723_strides_0 = const()[name = string("op_6723_strides_0"), val = tensor([1, 1])]; tensor var_6723_pad_0 = const()[name = string("op_6723_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6723_dilations_0 = const()[name = string("op_6723_dilations_0"), val = tensor([1, 1])]; int32 var_6723_groups_0 = const()[name = string("op_6723_groups_0"), val = int32(1)]; tensor var_6723 = conv(dilations = var_6723_dilations_0, groups = var_6723_groups_0, pad = var_6723_pad_0, pad_type = var_6723_pad_type_0, strides = var_6723_strides_0, weight = model_model_layers_11_self_attn_q_proj_weight_palettized, x = var_6707)[name = string("op_6723")]; tensor var_6728 = const()[name = string("op_6728"), val = tensor([1, 16, 1, 128])]; tensor var_6729 = reshape(shape = var_6728, x = var_6723)[name = string("op_6729")]; string var_6745_pad_type_0 = const()[name = string("op_6745_pad_type_0"), val = string("valid")]; tensor var_6745_strides_0 = const()[name = string("op_6745_strides_0"), val = tensor([1, 1])]; tensor var_6745_pad_0 = const()[name = string("op_6745_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6745_dilations_0 = const()[name = string("op_6745_dilations_0"), val = tensor([1, 1])]; int32 var_6745_groups_0 = const()[name = string("op_6745_groups_0"), val = int32(1)]; tensor var_6745 = conv(dilations = var_6745_dilations_0, groups = var_6745_groups_0, pad = var_6745_pad_0, pad_type = var_6745_pad_type_0, strides = var_6745_strides_0, weight = model_model_layers_11_self_attn_k_proj_weight_palettized, x = var_6707)[name = string("op_6745")]; tensor var_6750 = const()[name = string("op_6750"), val = tensor([1, 8, 1, 128])]; tensor var_6751 = reshape(shape = var_6750, x = var_6745)[name = string("op_6751")]; string var_6767_pad_type_0 = const()[name = string("op_6767_pad_type_0"), val = string("valid")]; tensor var_6767_strides_0 = const()[name = string("op_6767_strides_0"), val = tensor([1, 1])]; tensor var_6767_pad_0 = const()[name = string("op_6767_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6767_dilations_0 = const()[name = string("op_6767_dilations_0"), val = tensor([1, 1])]; int32 var_6767_groups_0 = const()[name = string("op_6767_groups_0"), val = int32(1)]; tensor var_6767 = conv(dilations = var_6767_dilations_0, groups = var_6767_groups_0, pad = var_6767_pad_0, pad_type = var_6767_pad_type_0, strides = var_6767_strides_0, weight = model_model_layers_11_self_attn_v_proj_weight_palettized, x = var_6707)[name = string("op_6767")]; tensor var_6772 = const()[name = string("op_6772"), val = tensor([1, 8, 1, 128])]; tensor var_6773 = reshape(shape = var_6772, x = var_6767)[name = string("op_6773")]; tensor mean_91_axes_0 = const()[name = string("mean_91_axes_0"), val = tensor([-1])]; bool mean_91_keep_dims_0 = const()[name = string("mean_91_keep_dims_0"), val = bool(true)]; tensor mean_91 = reduce_mean(axes = mean_91_axes_0, keep_dims = mean_91_keep_dims_0, x = var_6729)[name = string("mean_91")]; tensor input_203 = sub(x = var_6729, y = mean_91)[name = string("input_203")]; tensor var_6794_axes_0 = const()[name = string("op_6794_axes_0"), val = tensor([-1])]; tensor model_model_layers_11_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_11_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1045785472)))]; fp16 var_6782_to_fp16 = const()[name = string("op_6782_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6794_cast_fp16 = layer_norm(axes = var_6794_axes_0, epsilon = var_6782_to_fp16, gamma = model_model_layers_11_self_attn_q_norm_weight_to_fp16, x = input_203)[name = string("op_6794_cast_fp16")]; tensor mean_93_axes_0 = const()[name = string("mean_93_axes_0"), val = tensor([-1])]; bool mean_93_keep_dims_0 = const()[name = string("mean_93_keep_dims_0"), val = bool(true)]; tensor mean_93 = reduce_mean(axes = mean_93_axes_0, keep_dims = mean_93_keep_dims_0, x = var_6751)[name = string("mean_93")]; tensor input_205 = sub(x = var_6751, y = mean_93)[name = string("input_205")]; tensor var_6812_axes_0 = const()[name = string("op_6812_axes_0"), val = tensor([-1])]; tensor model_model_layers_11_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_11_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1045785792)))]; fp16 var_6800_to_fp16 = const()[name = string("op_6800_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6812_cast_fp16 = layer_norm(axes = var_6812_axes_0, epsilon = var_6800_to_fp16, gamma = model_model_layers_11_self_attn_k_norm_weight_to_fp16, x = input_205)[name = string("op_6812_cast_fp16")]; tensor var_6815 = mul(x = var_6794_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6815")]; tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_45 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = var_6794_cast_fp16)[name = string("x1_45")]; tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_45 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = var_6794_cast_fp16)[name = string("x2_45")]; fp16 const_203_promoted = const()[name = string("const_203_promoted"), val = fp16(-0x1p+0)]; tensor var_6836 = mul(x = x2_45, y = const_203_promoted)[name = string("op_6836")]; int32 var_6838 = const()[name = string("op_6838"), val = int32(-1)]; bool var_6839_interleave_0 = const()[name = string("op_6839_interleave_0"), val = bool(false)]; tensor var_6839 = concat(axis = var_6838, interleave = var_6839_interleave_0, values = (var_6836, x1_45))[name = string("op_6839")]; tensor var_6840 = mul(x = var_6839, y = sin_1_cast_fp16)[name = string("op_6840")]; tensor query_states_45 = add(x = var_6815, y = var_6840)[name = string("query_states_45")]; tensor var_6843 = mul(x = var_6812_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6843")]; tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_47 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = var_6812_cast_fp16)[name = string("x1_47")]; tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_47 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = var_6812_cast_fp16)[name = string("x2_47")]; fp16 const_206_promoted = const()[name = string("const_206_promoted"), val = fp16(-0x1p+0)]; tensor var_6864 = mul(x = x2_47, y = const_206_promoted)[name = string("op_6864")]; int32 var_6866 = const()[name = string("op_6866"), val = int32(-1)]; bool var_6867_interleave_0 = const()[name = string("op_6867_interleave_0"), val = bool(false)]; tensor var_6867 = concat(axis = var_6866, interleave = var_6867_interleave_0, values = (var_6864, x1_47))[name = string("op_6867")]; tensor var_6868 = mul(x = var_6867, y = sin_1_cast_fp16)[name = string("op_6868")]; tensor key_states_45 = add(x = var_6843, y = var_6868)[name = string("key_states_45")]; tensor expand_dims_132 = const()[name = string("expand_dims_132"), val = tensor([11])]; tensor expand_dims_133 = const()[name = string("expand_dims_133"), val = tensor([0])]; tensor expand_dims_135 = const()[name = string("expand_dims_135"), val = tensor([0])]; tensor expand_dims_136 = const()[name = string("expand_dims_136"), val = tensor([12])]; int32 concat_90_axis_0 = const()[name = string("concat_90_axis_0"), val = int32(0)]; bool concat_90_interleave_0 = const()[name = string("concat_90_interleave_0"), val = bool(false)]; tensor concat_90 = concat(axis = concat_90_axis_0, interleave = concat_90_interleave_0, values = (expand_dims_132, expand_dims_133, current_pos, expand_dims_135))[name = string("concat_90")]; tensor concat_91_values1_0 = const()[name = string("concat_91_values1_0"), val = tensor([0])]; tensor concat_91_values3_0 = const()[name = string("concat_91_values3_0"), val = tensor([0])]; int32 concat_91_axis_0 = const()[name = string("concat_91_axis_0"), val = int32(0)]; bool concat_91_interleave_0 = const()[name = string("concat_91_interleave_0"), val = bool(false)]; tensor concat_91 = concat(axis = concat_91_axis_0, interleave = concat_91_interleave_0, values = (expand_dims_136, concat_91_values1_0, var_1725, concat_91_values3_0))[name = string("concat_91")]; tensor model_model_kv_cache_0_internal_tensor_assign_23_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_90, begin_mask = model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0, end = concat_91, end_mask = model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_23_stride_0, update = key_states_45, x = coreml_update_state_77)[name = string("model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_22_write_state")]; tensor coreml_update_state_78 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_22")]; tensor expand_dims_138 = const()[name = string("expand_dims_138"), val = tensor([39])]; tensor expand_dims_139 = const()[name = string("expand_dims_139"), val = tensor([0])]; tensor expand_dims_141 = const()[name = string("expand_dims_141"), val = tensor([0])]; tensor expand_dims_142 = const()[name = string("expand_dims_142"), val = tensor([40])]; int32 concat_94_axis_0 = const()[name = string("concat_94_axis_0"), val = int32(0)]; bool concat_94_interleave_0 = const()[name = string("concat_94_interleave_0"), val = bool(false)]; tensor concat_94 = concat(axis = concat_94_axis_0, interleave = concat_94_interleave_0, values = (expand_dims_138, expand_dims_139, current_pos, expand_dims_141))[name = string("concat_94")]; tensor concat_95_values1_0 = const()[name = string("concat_95_values1_0"), val = tensor([0])]; tensor concat_95_values3_0 = const()[name = string("concat_95_values3_0"), val = tensor([0])]; int32 concat_95_axis_0 = const()[name = string("concat_95_axis_0"), val = int32(0)]; bool concat_95_interleave_0 = const()[name = string("concat_95_interleave_0"), val = bool(false)]; tensor concat_95 = concat(axis = concat_95_axis_0, interleave = concat_95_interleave_0, values = (expand_dims_142, concat_95_values1_0, var_1725, concat_95_values3_0))[name = string("concat_95")]; tensor model_model_kv_cache_0_internal_tensor_assign_24_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_94, begin_mask = model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0, end = concat_95, end_mask = model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_24_stride_0, update = var_6773, x = coreml_update_state_78)[name = string("model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_23_write_state")]; tensor coreml_update_state_79 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_23")]; tensor var_6923_begin_0 = const()[name = string("op_6923_begin_0"), val = tensor([11, 0, 0, 0])]; tensor var_6923_end_0 = const()[name = string("op_6923_end_0"), val = tensor([12, 8, 1024, 128])]; tensor var_6923_end_mask_0 = const()[name = string("op_6923_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6923_cast_fp16 = slice_by_index(begin = var_6923_begin_0, end = var_6923_end_0, end_mask = var_6923_end_mask_0, x = coreml_update_state_79)[name = string("op_6923_cast_fp16")]; tensor K_layer_cache_23_axes_0 = const()[name = string("K_layer_cache_23_axes_0"), val = tensor([0])]; tensor K_layer_cache_23_cast_fp16 = squeeze(axes = K_layer_cache_23_axes_0, x = var_6923_cast_fp16)[name = string("K_layer_cache_23_cast_fp16")]; tensor var_6930_begin_0 = const()[name = string("op_6930_begin_0"), val = tensor([39, 0, 0, 0])]; tensor var_6930_end_0 = const()[name = string("op_6930_end_0"), val = tensor([40, 8, 1024, 128])]; tensor var_6930_end_mask_0 = const()[name = string("op_6930_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6930_cast_fp16 = slice_by_index(begin = var_6930_begin_0, end = var_6930_end_0, end_mask = var_6930_end_mask_0, x = coreml_update_state_79)[name = string("op_6930_cast_fp16")]; tensor V_layer_cache_23_axes_0 = const()[name = string("V_layer_cache_23_axes_0"), val = tensor([0])]; tensor V_layer_cache_23_cast_fp16 = squeeze(axes = V_layer_cache_23_axes_0, x = var_6930_cast_fp16)[name = string("V_layer_cache_23_cast_fp16")]; tensor x_227_axes_0 = const()[name = string("x_227_axes_0"), val = tensor([1])]; tensor x_227_cast_fp16 = expand_dims(axes = x_227_axes_0, x = K_layer_cache_23_cast_fp16)[name = string("x_227_cast_fp16")]; tensor var_6967 = const()[name = string("op_6967"), val = tensor([1, 2, 1, 1])]; tensor x_229_cast_fp16 = tile(reps = var_6967, x = x_227_cast_fp16)[name = string("x_229_cast_fp16")]; tensor var_6979 = const()[name = string("op_6979"), val = tensor([1, -1, 1024, 128])]; tensor key_states_47_cast_fp16 = reshape(shape = var_6979, x = x_229_cast_fp16)[name = string("key_states_47_cast_fp16")]; tensor x_233_axes_0 = const()[name = string("x_233_axes_0"), val = tensor([1])]; tensor x_233_cast_fp16 = expand_dims(axes = x_233_axes_0, x = V_layer_cache_23_cast_fp16)[name = string("x_233_cast_fp16")]; tensor var_6987 = const()[name = string("op_6987"), val = tensor([1, 2, 1, 1])]; tensor x_235_cast_fp16 = tile(reps = var_6987, x = x_233_cast_fp16)[name = string("x_235_cast_fp16")]; tensor var_6999 = const()[name = string("op_6999"), val = tensor([1, -1, 1024, 128])]; tensor value_states_69_cast_fp16 = reshape(shape = var_6999, x = x_235_cast_fp16)[name = string("value_states_69_cast_fp16")]; bool var_7014_transpose_x_1 = const()[name = string("op_7014_transpose_x_1"), val = bool(false)]; bool var_7014_transpose_y_1 = const()[name = string("op_7014_transpose_y_1"), val = bool(true)]; tensor var_7014 = matmul(transpose_x = var_7014_transpose_x_1, transpose_y = var_7014_transpose_y_1, x = query_states_45, y = key_states_47_cast_fp16)[name = string("op_7014")]; fp16 var_7015_to_fp16 = const()[name = string("op_7015_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_67_cast_fp16 = mul(x = var_7014, y = var_7015_to_fp16)[name = string("attn_weights_67_cast_fp16")]; tensor attn_weights_69_cast_fp16 = add(x = attn_weights_67_cast_fp16, y = causal_mask)[name = string("attn_weights_69_cast_fp16")]; int32 var_7050 = const()[name = string("op_7050"), val = int32(-1)]; tensor attn_weights_71_cast_fp16 = softmax(axis = var_7050, x = attn_weights_69_cast_fp16)[name = string("attn_weights_71_cast_fp16")]; bool attn_output_111_transpose_x_0 = const()[name = string("attn_output_111_transpose_x_0"), val = bool(false)]; bool attn_output_111_transpose_y_0 = const()[name = string("attn_output_111_transpose_y_0"), val = bool(false)]; tensor attn_output_111_cast_fp16 = matmul(transpose_x = attn_output_111_transpose_x_0, transpose_y = attn_output_111_transpose_y_0, x = attn_weights_71_cast_fp16, y = value_states_69_cast_fp16)[name = string("attn_output_111_cast_fp16")]; tensor var_7061_perm_0 = const()[name = string("op_7061_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_7065 = const()[name = string("op_7065"), val = tensor([1, 1, 2048])]; tensor var_7061_cast_fp16 = transpose(perm = var_7061_perm_0, x = attn_output_111_cast_fp16)[name = string("transpose_100")]; tensor attn_output_115_cast_fp16 = reshape(shape = var_7065, x = var_7061_cast_fp16)[name = string("attn_output_115_cast_fp16")]; tensor var_7070 = const()[name = string("op_7070"), val = tensor([0, 2, 1])]; string var_7086_pad_type_0 = const()[name = string("op_7086_pad_type_0"), val = string("valid")]; int32 var_7086_groups_0 = const()[name = string("op_7086_groups_0"), val = int32(1)]; tensor var_7086_strides_0 = const()[name = string("op_7086_strides_0"), val = tensor([1])]; tensor var_7086_pad_0 = const()[name = string("op_7086_pad_0"), val = tensor([0, 0])]; tensor var_7086_dilations_0 = const()[name = string("op_7086_dilations_0"), val = tensor([1])]; tensor squeeze_11_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1045786112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1048931904))))[name = string("squeeze_11_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_7071_cast_fp16 = transpose(perm = var_7070, x = attn_output_115_cast_fp16)[name = string("transpose_99")]; tensor var_7086_cast_fp16 = conv(dilations = var_7086_dilations_0, groups = var_7086_groups_0, pad = var_7086_pad_0, pad_type = var_7086_pad_type_0, strides = var_7086_strides_0, weight = squeeze_11_cast_fp16_to_fp32_to_fp16_palettized, x = var_7071_cast_fp16)[name = string("op_7086_cast_fp16")]; tensor var_7090 = const()[name = string("op_7090"), val = tensor([0, 2, 1])]; tensor attn_output_119_cast_fp16 = transpose(perm = var_7090, x = var_7086_cast_fp16)[name = string("transpose_98")]; tensor hidden_states_71_cast_fp16 = add(x = hidden_states_67_cast_fp16, y = attn_output_119_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; tensor mean_95_axes_0 = const()[name = string("mean_95_axes_0"), val = tensor([-1])]; bool mean_95_keep_dims_0 = const()[name = string("mean_95_keep_dims_0"), val = bool(true)]; tensor mean_95_cast_fp16 = reduce_mean(axes = mean_95_axes_0, keep_dims = mean_95_keep_dims_0, x = hidden_states_71_cast_fp16)[name = string("mean_95_cast_fp16")]; tensor input_209_cast_fp16 = sub(x = hidden_states_71_cast_fp16, y = mean_95_cast_fp16)[name = string("input_209_cast_fp16")]; tensor var_7109_axes_0 = const()[name = string("op_7109_axes_0"), val = tensor([-1])]; tensor model_model_layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1048964736)))]; fp16 var_7097_to_fp16 = const()[name = string("op_7097_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7109_cast_fp16 = layer_norm(axes = var_7109_axes_0, epsilon = var_7097_to_fp16, gamma = model_model_layers_11_post_attention_layernorm_weight_to_fp16, x = input_209_cast_fp16)[name = string("op_7109_cast_fp16")]; tensor var_7123 = const()[name = string("op_7123"), val = tensor([0, 2, 1])]; tensor input_211_axes_0 = const()[name = string("input_211_axes_0"), val = tensor([2])]; tensor var_7124 = transpose(perm = var_7123, x = var_7109_cast_fp16)[name = string("transpose_97")]; tensor input_211 = expand_dims(axes = input_211_axes_0, x = var_7124)[name = string("input_211")]; string input_213_pad_type_0 = const()[name = string("input_213_pad_type_0"), val = string("valid")]; tensor input_213_strides_0 = const()[name = string("input_213_strides_0"), val = tensor([1, 1])]; tensor input_213_pad_0 = const()[name = string("input_213_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_213_dilations_0 = const()[name = string("input_213_dilations_0"), val = tensor([1, 1])]; int32 input_213_groups_0 = const()[name = string("input_213_groups_0"), val = int32(1)]; tensor input_213 = conv(dilations = input_213_dilations_0, groups = input_213_groups_0, pad = input_213_pad_0, pad_type = input_213_pad_type_0, strides = input_213_strides_0, weight = model_model_layers_11_mlp_gate_proj_weight_palettized, x = input_211)[name = string("input_213")]; string b_23_pad_type_0 = const()[name = string("b_23_pad_type_0"), val = string("valid")]; tensor b_23_strides_0 = const()[name = string("b_23_strides_0"), val = tensor([1, 1])]; tensor b_23_pad_0 = const()[name = string("b_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_23_dilations_0 = const()[name = string("b_23_dilations_0"), val = tensor([1, 1])]; int32 b_23_groups_0 = const()[name = string("b_23_groups_0"), val = int32(1)]; tensor b_23 = conv(dilations = b_23_dilations_0, groups = b_23_groups_0, pad = b_23_pad_0, pad_type = b_23_pad_type_0, strides = b_23_strides_0, weight = model_model_layers_11_mlp_up_proj_weight_palettized, x = input_211)[name = string("b_23")]; tensor c_23 = silu(x = input_213)[name = string("c_23")]; tensor input_215 = mul(x = c_23, y = b_23)[name = string("input_215")]; string e_23_pad_type_0 = const()[name = string("e_23_pad_type_0"), val = string("valid")]; tensor e_23_strides_0 = const()[name = string("e_23_strides_0"), val = tensor([1, 1])]; tensor e_23_pad_0 = const()[name = string("e_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_23_dilations_0 = const()[name = string("e_23_dilations_0"), val = tensor([1, 1])]; int32 e_23_groups_0 = const()[name = string("e_23_groups_0"), val = int32(1)]; tensor e_23 = conv(dilations = e_23_dilations_0, groups = e_23_groups_0, pad = e_23_pad_0, pad_type = e_23_pad_type_0, strides = e_23_strides_0, weight = model_model_layers_11_mlp_down_proj_weight_palettized, x = input_215)[name = string("e_23")]; tensor var_7146_axes_0 = const()[name = string("op_7146_axes_0"), val = tensor([2])]; tensor var_7146 = squeeze(axes = var_7146_axes_0, x = e_23)[name = string("op_7146")]; tensor var_7147 = const()[name = string("op_7147"), val = tensor([0, 2, 1])]; tensor var_7148 = transpose(perm = var_7147, x = var_7146)[name = string("transpose_96")]; tensor hidden_states_73_cast_fp16 = add(x = hidden_states_71_cast_fp16, y = var_7148)[name = string("hidden_states_73_cast_fp16")]; tensor mean_97_axes_0 = const()[name = string("mean_97_axes_0"), val = tensor([-1])]; bool mean_97_keep_dims_0 = const()[name = string("mean_97_keep_dims_0"), val = bool(true)]; tensor mean_97_cast_fp16 = reduce_mean(axes = mean_97_axes_0, keep_dims = mean_97_keep_dims_0, x = hidden_states_73_cast_fp16)[name = string("mean_97_cast_fp16")]; tensor input_217_cast_fp16 = sub(x = hidden_states_73_cast_fp16, y = mean_97_cast_fp16)[name = string("input_217_cast_fp16")]; tensor var_7166_axes_0 = const()[name = string("op_7166_axes_0"), val = tensor([-1])]; tensor model_model_layers_12_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1048968896)))]; fp16 var_7154_to_fp16 = const()[name = string("op_7154_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7166_cast_fp16 = layer_norm(axes = var_7166_axes_0, epsilon = var_7154_to_fp16, gamma = model_model_layers_12_input_layernorm_weight_to_fp16, x = input_217_cast_fp16)[name = string("op_7166_cast_fp16")]; tensor var_7172 = const()[name = string("op_7172"), val = tensor([0, 2, 1])]; tensor var_7175_axes_0 = const()[name = string("op_7175_axes_0"), val = tensor([2])]; tensor var_7173 = transpose(perm = var_7172, x = var_7166_cast_fp16)[name = string("transpose_95")]; tensor var_7175 = expand_dims(axes = var_7175_axes_0, x = var_7173)[name = string("op_7175")]; string var_7191_pad_type_0 = const()[name = string("op_7191_pad_type_0"), val = string("valid")]; tensor var_7191_strides_0 = const()[name = string("op_7191_strides_0"), val = tensor([1, 1])]; tensor var_7191_pad_0 = const()[name = string("op_7191_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7191_dilations_0 = const()[name = string("op_7191_dilations_0"), val = tensor([1, 1])]; int32 var_7191_groups_0 = const()[name = string("op_7191_groups_0"), val = int32(1)]; tensor var_7191 = conv(dilations = var_7191_dilations_0, groups = var_7191_groups_0, pad = var_7191_pad_0, pad_type = var_7191_pad_type_0, strides = var_7191_strides_0, weight = model_model_layers_12_self_attn_q_proj_weight_palettized, x = var_7175)[name = string("op_7191")]; tensor var_7196 = const()[name = string("op_7196"), val = tensor([1, 16, 1, 128])]; tensor var_7197 = reshape(shape = var_7196, x = var_7191)[name = string("op_7197")]; string var_7213_pad_type_0 = const()[name = string("op_7213_pad_type_0"), val = string("valid")]; tensor var_7213_strides_0 = const()[name = string("op_7213_strides_0"), val = tensor([1, 1])]; tensor var_7213_pad_0 = const()[name = string("op_7213_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7213_dilations_0 = const()[name = string("op_7213_dilations_0"), val = tensor([1, 1])]; int32 var_7213_groups_0 = const()[name = string("op_7213_groups_0"), val = int32(1)]; tensor var_7213 = conv(dilations = var_7213_dilations_0, groups = var_7213_groups_0, pad = var_7213_pad_0, pad_type = var_7213_pad_type_0, strides = var_7213_strides_0, weight = model_model_layers_12_self_attn_k_proj_weight_palettized, x = var_7175)[name = string("op_7213")]; tensor var_7218 = const()[name = string("op_7218"), val = tensor([1, 8, 1, 128])]; tensor var_7219 = reshape(shape = var_7218, x = var_7213)[name = string("op_7219")]; string var_7235_pad_type_0 = const()[name = string("op_7235_pad_type_0"), val = string("valid")]; tensor var_7235_strides_0 = const()[name = string("op_7235_strides_0"), val = tensor([1, 1])]; tensor var_7235_pad_0 = const()[name = string("op_7235_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7235_dilations_0 = const()[name = string("op_7235_dilations_0"), val = tensor([1, 1])]; int32 var_7235_groups_0 = const()[name = string("op_7235_groups_0"), val = int32(1)]; tensor var_7235 = conv(dilations = var_7235_dilations_0, groups = var_7235_groups_0, pad = var_7235_pad_0, pad_type = var_7235_pad_type_0, strides = var_7235_strides_0, weight = model_model_layers_12_self_attn_v_proj_weight_palettized, x = var_7175)[name = string("op_7235")]; tensor var_7240 = const()[name = string("op_7240"), val = tensor([1, 8, 1, 128])]; tensor var_7241 = reshape(shape = var_7240, x = var_7235)[name = string("op_7241")]; tensor mean_99_axes_0 = const()[name = string("mean_99_axes_0"), val = tensor([-1])]; bool mean_99_keep_dims_0 = const()[name = string("mean_99_keep_dims_0"), val = bool(true)]; tensor mean_99 = reduce_mean(axes = mean_99_axes_0, keep_dims = mean_99_keep_dims_0, x = var_7197)[name = string("mean_99")]; tensor input_221 = sub(x = var_7197, y = mean_99)[name = string("input_221")]; tensor var_7262_axes_0 = const()[name = string("op_7262_axes_0"), val = tensor([-1])]; tensor model_model_layers_12_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_12_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1048973056)))]; fp16 var_7250_to_fp16 = const()[name = string("op_7250_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7262_cast_fp16 = layer_norm(axes = var_7262_axes_0, epsilon = var_7250_to_fp16, gamma = model_model_layers_12_self_attn_q_norm_weight_to_fp16, x = input_221)[name = string("op_7262_cast_fp16")]; tensor mean_101_axes_0 = const()[name = string("mean_101_axes_0"), val = tensor([-1])]; bool mean_101_keep_dims_0 = const()[name = string("mean_101_keep_dims_0"), val = bool(true)]; tensor mean_101 = reduce_mean(axes = mean_101_axes_0, keep_dims = mean_101_keep_dims_0, x = var_7219)[name = string("mean_101")]; tensor input_223 = sub(x = var_7219, y = mean_101)[name = string("input_223")]; tensor var_7280_axes_0 = const()[name = string("op_7280_axes_0"), val = tensor([-1])]; tensor model_model_layers_12_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_12_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1048973376)))]; fp16 var_7268_to_fp16 = const()[name = string("op_7268_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7280_cast_fp16 = layer_norm(axes = var_7280_axes_0, epsilon = var_7268_to_fp16, gamma = model_model_layers_12_self_attn_k_norm_weight_to_fp16, x = input_223)[name = string("op_7280_cast_fp16")]; tensor var_7283 = mul(x = var_7262_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7283")]; tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_49 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = var_7262_cast_fp16)[name = string("x1_49")]; tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_49 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = var_7262_cast_fp16)[name = string("x2_49")]; fp16 const_221_promoted = const()[name = string("const_221_promoted"), val = fp16(-0x1p+0)]; tensor var_7304 = mul(x = x2_49, y = const_221_promoted)[name = string("op_7304")]; int32 var_7306 = const()[name = string("op_7306"), val = int32(-1)]; bool var_7307_interleave_0 = const()[name = string("op_7307_interleave_0"), val = bool(false)]; tensor var_7307 = concat(axis = var_7306, interleave = var_7307_interleave_0, values = (var_7304, x1_49))[name = string("op_7307")]; tensor var_7308 = mul(x = var_7307, y = sin_1_cast_fp16)[name = string("op_7308")]; tensor query_states_49 = add(x = var_7283, y = var_7308)[name = string("query_states_49")]; tensor var_7311 = mul(x = var_7280_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7311")]; tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_51 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = var_7280_cast_fp16)[name = string("x1_51")]; tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_51 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = var_7280_cast_fp16)[name = string("x2_51")]; fp16 const_224_promoted = const()[name = string("const_224_promoted"), val = fp16(-0x1p+0)]; tensor var_7332 = mul(x = x2_51, y = const_224_promoted)[name = string("op_7332")]; int32 var_7334 = const()[name = string("op_7334"), val = int32(-1)]; bool var_7335_interleave_0 = const()[name = string("op_7335_interleave_0"), val = bool(false)]; tensor var_7335 = concat(axis = var_7334, interleave = var_7335_interleave_0, values = (var_7332, x1_51))[name = string("op_7335")]; tensor var_7336 = mul(x = var_7335, y = sin_1_cast_fp16)[name = string("op_7336")]; tensor key_states_49 = add(x = var_7311, y = var_7336)[name = string("key_states_49")]; tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([12])]; tensor expand_dims_145 = const()[name = string("expand_dims_145"), val = tensor([0])]; tensor expand_dims_147 = const()[name = string("expand_dims_147"), val = tensor([0])]; tensor expand_dims_148 = const()[name = string("expand_dims_148"), val = tensor([13])]; int32 concat_98_axis_0 = const()[name = string("concat_98_axis_0"), val = int32(0)]; bool concat_98_interleave_0 = const()[name = string("concat_98_interleave_0"), val = bool(false)]; tensor concat_98 = concat(axis = concat_98_axis_0, interleave = concat_98_interleave_0, values = (expand_dims_144, expand_dims_145, current_pos, expand_dims_147))[name = string("concat_98")]; tensor concat_99_values1_0 = const()[name = string("concat_99_values1_0"), val = tensor([0])]; tensor concat_99_values3_0 = const()[name = string("concat_99_values3_0"), val = tensor([0])]; int32 concat_99_axis_0 = const()[name = string("concat_99_axis_0"), val = int32(0)]; bool concat_99_interleave_0 = const()[name = string("concat_99_interleave_0"), val = bool(false)]; tensor concat_99 = concat(axis = concat_99_axis_0, interleave = concat_99_interleave_0, values = (expand_dims_148, concat_99_values1_0, var_1725, concat_99_values3_0))[name = string("concat_99")]; tensor model_model_kv_cache_0_internal_tensor_assign_25_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_98, begin_mask = model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0, end = concat_99, end_mask = model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_25_stride_0, update = key_states_49, x = coreml_update_state_79)[name = string("model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_24_write_state")]; tensor coreml_update_state_80 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_24")]; tensor expand_dims_150 = const()[name = string("expand_dims_150"), val = tensor([40])]; tensor expand_dims_151 = const()[name = string("expand_dims_151"), val = tensor([0])]; tensor expand_dims_153 = const()[name = string("expand_dims_153"), val = tensor([0])]; tensor expand_dims_154 = const()[name = string("expand_dims_154"), val = tensor([41])]; int32 concat_102_axis_0 = const()[name = string("concat_102_axis_0"), val = int32(0)]; bool concat_102_interleave_0 = const()[name = string("concat_102_interleave_0"), val = bool(false)]; tensor concat_102 = concat(axis = concat_102_axis_0, interleave = concat_102_interleave_0, values = (expand_dims_150, expand_dims_151, current_pos, expand_dims_153))[name = string("concat_102")]; tensor concat_103_values1_0 = const()[name = string("concat_103_values1_0"), val = tensor([0])]; tensor concat_103_values3_0 = const()[name = string("concat_103_values3_0"), val = tensor([0])]; int32 concat_103_axis_0 = const()[name = string("concat_103_axis_0"), val = int32(0)]; bool concat_103_interleave_0 = const()[name = string("concat_103_interleave_0"), val = bool(false)]; tensor concat_103 = concat(axis = concat_103_axis_0, interleave = concat_103_interleave_0, values = (expand_dims_154, concat_103_values1_0, var_1725, concat_103_values3_0))[name = string("concat_103")]; tensor model_model_kv_cache_0_internal_tensor_assign_26_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_102, begin_mask = model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0, end = concat_103, end_mask = model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_26_stride_0, update = var_7241, x = coreml_update_state_80)[name = string("model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_25_write_state")]; tensor coreml_update_state_81 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_25")]; tensor var_7391_begin_0 = const()[name = string("op_7391_begin_0"), val = tensor([12, 0, 0, 0])]; tensor var_7391_end_0 = const()[name = string("op_7391_end_0"), val = tensor([13, 8, 1024, 128])]; tensor var_7391_end_mask_0 = const()[name = string("op_7391_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7391_cast_fp16 = slice_by_index(begin = var_7391_begin_0, end = var_7391_end_0, end_mask = var_7391_end_mask_0, x = coreml_update_state_81)[name = string("op_7391_cast_fp16")]; tensor K_layer_cache_25_axes_0 = const()[name = string("K_layer_cache_25_axes_0"), val = tensor([0])]; tensor K_layer_cache_25_cast_fp16 = squeeze(axes = K_layer_cache_25_axes_0, x = var_7391_cast_fp16)[name = string("K_layer_cache_25_cast_fp16")]; tensor var_7398_begin_0 = const()[name = string("op_7398_begin_0"), val = tensor([40, 0, 0, 0])]; tensor var_7398_end_0 = const()[name = string("op_7398_end_0"), val = tensor([41, 8, 1024, 128])]; tensor var_7398_end_mask_0 = const()[name = string("op_7398_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7398_cast_fp16 = slice_by_index(begin = var_7398_begin_0, end = var_7398_end_0, end_mask = var_7398_end_mask_0, x = coreml_update_state_81)[name = string("op_7398_cast_fp16")]; tensor V_layer_cache_25_axes_0 = const()[name = string("V_layer_cache_25_axes_0"), val = tensor([0])]; tensor V_layer_cache_25_cast_fp16 = squeeze(axes = V_layer_cache_25_axes_0, x = var_7398_cast_fp16)[name = string("V_layer_cache_25_cast_fp16")]; tensor x_247_axes_0 = const()[name = string("x_247_axes_0"), val = tensor([1])]; tensor x_247_cast_fp16 = expand_dims(axes = x_247_axes_0, x = K_layer_cache_25_cast_fp16)[name = string("x_247_cast_fp16")]; tensor var_7435 = const()[name = string("op_7435"), val = tensor([1, 2, 1, 1])]; tensor x_249_cast_fp16 = tile(reps = var_7435, x = x_247_cast_fp16)[name = string("x_249_cast_fp16")]; tensor var_7447 = const()[name = string("op_7447"), val = tensor([1, -1, 1024, 128])]; tensor key_states_51_cast_fp16 = reshape(shape = var_7447, x = x_249_cast_fp16)[name = string("key_states_51_cast_fp16")]; tensor x_253_axes_0 = const()[name = string("x_253_axes_0"), val = tensor([1])]; tensor x_253_cast_fp16 = expand_dims(axes = x_253_axes_0, x = V_layer_cache_25_cast_fp16)[name = string("x_253_cast_fp16")]; tensor var_7455 = const()[name = string("op_7455"), val = tensor([1, 2, 1, 1])]; tensor x_255_cast_fp16 = tile(reps = var_7455, x = x_253_cast_fp16)[name = string("x_255_cast_fp16")]; tensor var_7467 = const()[name = string("op_7467"), val = tensor([1, -1, 1024, 128])]; tensor value_states_75_cast_fp16 = reshape(shape = var_7467, x = x_255_cast_fp16)[name = string("value_states_75_cast_fp16")]; bool var_7482_transpose_x_1 = const()[name = string("op_7482_transpose_x_1"), val = bool(false)]; bool var_7482_transpose_y_1 = const()[name = string("op_7482_transpose_y_1"), val = bool(true)]; tensor var_7482 = matmul(transpose_x = var_7482_transpose_x_1, transpose_y = var_7482_transpose_y_1, x = query_states_49, y = key_states_51_cast_fp16)[name = string("op_7482")]; fp16 var_7483_to_fp16 = const()[name = string("op_7483_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_73_cast_fp16 = mul(x = var_7482, y = var_7483_to_fp16)[name = string("attn_weights_73_cast_fp16")]; tensor attn_weights_75_cast_fp16 = add(x = attn_weights_73_cast_fp16, y = causal_mask)[name = string("attn_weights_75_cast_fp16")]; int32 var_7518 = const()[name = string("op_7518"), val = int32(-1)]; tensor attn_weights_77_cast_fp16 = softmax(axis = var_7518, x = attn_weights_75_cast_fp16)[name = string("attn_weights_77_cast_fp16")]; bool attn_output_121_transpose_x_0 = const()[name = string("attn_output_121_transpose_x_0"), val = bool(false)]; bool attn_output_121_transpose_y_0 = const()[name = string("attn_output_121_transpose_y_0"), val = bool(false)]; tensor attn_output_121_cast_fp16 = matmul(transpose_x = attn_output_121_transpose_x_0, transpose_y = attn_output_121_transpose_y_0, x = attn_weights_77_cast_fp16, y = value_states_75_cast_fp16)[name = string("attn_output_121_cast_fp16")]; tensor var_7529_perm_0 = const()[name = string("op_7529_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_7533 = const()[name = string("op_7533"), val = tensor([1, 1, 2048])]; tensor var_7529_cast_fp16 = transpose(perm = var_7529_perm_0, x = attn_output_121_cast_fp16)[name = string("transpose_94")]; tensor attn_output_125_cast_fp16 = reshape(shape = var_7533, x = var_7529_cast_fp16)[name = string("attn_output_125_cast_fp16")]; tensor var_7538 = const()[name = string("op_7538"), val = tensor([0, 2, 1])]; string var_7554_pad_type_0 = const()[name = string("op_7554_pad_type_0"), val = string("valid")]; int32 var_7554_groups_0 = const()[name = string("op_7554_groups_0"), val = int32(1)]; tensor var_7554_strides_0 = const()[name = string("op_7554_strides_0"), val = tensor([1])]; tensor var_7554_pad_0 = const()[name = string("op_7554_pad_0"), val = tensor([0, 0])]; tensor var_7554_dilations_0 = const()[name = string("op_7554_dilations_0"), val = tensor([1])]; tensor squeeze_12_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1048973696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1052119488))))[name = string("squeeze_12_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_7539_cast_fp16 = transpose(perm = var_7538, x = attn_output_125_cast_fp16)[name = string("transpose_93")]; tensor var_7554_cast_fp16 = conv(dilations = var_7554_dilations_0, groups = var_7554_groups_0, pad = var_7554_pad_0, pad_type = var_7554_pad_type_0, strides = var_7554_strides_0, weight = squeeze_12_cast_fp16_to_fp32_to_fp16_palettized, x = var_7539_cast_fp16)[name = string("op_7554_cast_fp16")]; tensor var_7558 = const()[name = string("op_7558"), val = tensor([0, 2, 1])]; tensor attn_output_129_cast_fp16 = transpose(perm = var_7558, x = var_7554_cast_fp16)[name = string("transpose_92")]; tensor hidden_states_77_cast_fp16 = add(x = hidden_states_73_cast_fp16, y = attn_output_129_cast_fp16)[name = string("hidden_states_77_cast_fp16")]; tensor mean_103_axes_0 = const()[name = string("mean_103_axes_0"), val = tensor([-1])]; bool mean_103_keep_dims_0 = const()[name = string("mean_103_keep_dims_0"), val = bool(true)]; tensor mean_103_cast_fp16 = reduce_mean(axes = mean_103_axes_0, keep_dims = mean_103_keep_dims_0, x = hidden_states_77_cast_fp16)[name = string("mean_103_cast_fp16")]; tensor input_227_cast_fp16 = sub(x = hidden_states_77_cast_fp16, y = mean_103_cast_fp16)[name = string("input_227_cast_fp16")]; tensor var_7577_axes_0 = const()[name = string("op_7577_axes_0"), val = tensor([-1])]; tensor model_model_layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1052152320)))]; fp16 var_7565_to_fp16 = const()[name = string("op_7565_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7577_cast_fp16 = layer_norm(axes = var_7577_axes_0, epsilon = var_7565_to_fp16, gamma = model_model_layers_12_post_attention_layernorm_weight_to_fp16, x = input_227_cast_fp16)[name = string("op_7577_cast_fp16")]; tensor var_7591 = const()[name = string("op_7591"), val = tensor([0, 2, 1])]; tensor input_229_axes_0 = const()[name = string("input_229_axes_0"), val = tensor([2])]; tensor var_7592 = transpose(perm = var_7591, x = var_7577_cast_fp16)[name = string("transpose_91")]; tensor input_229 = expand_dims(axes = input_229_axes_0, x = var_7592)[name = string("input_229")]; string input_231_pad_type_0 = const()[name = string("input_231_pad_type_0"), val = string("valid")]; tensor input_231_strides_0 = const()[name = string("input_231_strides_0"), val = tensor([1, 1])]; tensor input_231_pad_0 = const()[name = string("input_231_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_231_dilations_0 = const()[name = string("input_231_dilations_0"), val = tensor([1, 1])]; int32 input_231_groups_0 = const()[name = string("input_231_groups_0"), val = int32(1)]; tensor input_231 = conv(dilations = input_231_dilations_0, groups = input_231_groups_0, pad = input_231_pad_0, pad_type = input_231_pad_type_0, strides = input_231_strides_0, weight = model_model_layers_12_mlp_gate_proj_weight_palettized, x = input_229)[name = string("input_231")]; string b_25_pad_type_0 = const()[name = string("b_25_pad_type_0"), val = string("valid")]; tensor b_25_strides_0 = const()[name = string("b_25_strides_0"), val = tensor([1, 1])]; tensor b_25_pad_0 = const()[name = string("b_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_25_dilations_0 = const()[name = string("b_25_dilations_0"), val = tensor([1, 1])]; int32 b_25_groups_0 = const()[name = string("b_25_groups_0"), val = int32(1)]; tensor b_25 = conv(dilations = b_25_dilations_0, groups = b_25_groups_0, pad = b_25_pad_0, pad_type = b_25_pad_type_0, strides = b_25_strides_0, weight = model_model_layers_12_mlp_up_proj_weight_palettized, x = input_229)[name = string("b_25")]; tensor c_25 = silu(x = input_231)[name = string("c_25")]; tensor input_233 = mul(x = c_25, y = b_25)[name = string("input_233")]; string e_25_pad_type_0 = const()[name = string("e_25_pad_type_0"), val = string("valid")]; tensor e_25_strides_0 = const()[name = string("e_25_strides_0"), val = tensor([1, 1])]; tensor e_25_pad_0 = const()[name = string("e_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_25_dilations_0 = const()[name = string("e_25_dilations_0"), val = tensor([1, 1])]; int32 e_25_groups_0 = const()[name = string("e_25_groups_0"), val = int32(1)]; tensor e_25 = conv(dilations = e_25_dilations_0, groups = e_25_groups_0, pad = e_25_pad_0, pad_type = e_25_pad_type_0, strides = e_25_strides_0, weight = model_model_layers_12_mlp_down_proj_weight_palettized, x = input_233)[name = string("e_25")]; tensor var_7614_axes_0 = const()[name = string("op_7614_axes_0"), val = tensor([2])]; tensor var_7614 = squeeze(axes = var_7614_axes_0, x = e_25)[name = string("op_7614")]; tensor var_7615 = const()[name = string("op_7615"), val = tensor([0, 2, 1])]; tensor var_7616 = transpose(perm = var_7615, x = var_7614)[name = string("transpose_90")]; tensor hidden_states_79_cast_fp16 = add(x = hidden_states_77_cast_fp16, y = var_7616)[name = string("hidden_states_79_cast_fp16")]; tensor mean_105_axes_0 = const()[name = string("mean_105_axes_0"), val = tensor([-1])]; bool mean_105_keep_dims_0 = const()[name = string("mean_105_keep_dims_0"), val = bool(true)]; tensor mean_105_cast_fp16 = reduce_mean(axes = mean_105_axes_0, keep_dims = mean_105_keep_dims_0, x = hidden_states_79_cast_fp16)[name = string("mean_105_cast_fp16")]; tensor input_235_cast_fp16 = sub(x = hidden_states_79_cast_fp16, y = mean_105_cast_fp16)[name = string("input_235_cast_fp16")]; tensor var_7634_axes_0 = const()[name = string("op_7634_axes_0"), val = tensor([-1])]; tensor model_model_layers_13_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1052156480)))]; fp16 var_7622_to_fp16 = const()[name = string("op_7622_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7634_cast_fp16 = layer_norm(axes = var_7634_axes_0, epsilon = var_7622_to_fp16, gamma = model_model_layers_13_input_layernorm_weight_to_fp16, x = input_235_cast_fp16)[name = string("op_7634_cast_fp16")]; tensor var_7640 = const()[name = string("op_7640"), val = tensor([0, 2, 1])]; tensor var_7643_axes_0 = const()[name = string("op_7643_axes_0"), val = tensor([2])]; tensor var_7641 = transpose(perm = var_7640, x = var_7634_cast_fp16)[name = string("transpose_89")]; tensor var_7643 = expand_dims(axes = var_7643_axes_0, x = var_7641)[name = string("op_7643")]; string var_7659_pad_type_0 = const()[name = string("op_7659_pad_type_0"), val = string("valid")]; tensor var_7659_strides_0 = const()[name = string("op_7659_strides_0"), val = tensor([1, 1])]; tensor var_7659_pad_0 = const()[name = string("op_7659_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7659_dilations_0 = const()[name = string("op_7659_dilations_0"), val = tensor([1, 1])]; int32 var_7659_groups_0 = const()[name = string("op_7659_groups_0"), val = int32(1)]; tensor var_7659 = conv(dilations = var_7659_dilations_0, groups = var_7659_groups_0, pad = var_7659_pad_0, pad_type = var_7659_pad_type_0, strides = var_7659_strides_0, weight = model_model_layers_13_self_attn_q_proj_weight_palettized, x = var_7643)[name = string("op_7659")]; tensor var_7664 = const()[name = string("op_7664"), val = tensor([1, 16, 1, 128])]; tensor var_7665 = reshape(shape = var_7664, x = var_7659)[name = string("op_7665")]; string var_7681_pad_type_0 = const()[name = string("op_7681_pad_type_0"), val = string("valid")]; tensor var_7681_strides_0 = const()[name = string("op_7681_strides_0"), val = tensor([1, 1])]; tensor var_7681_pad_0 = const()[name = string("op_7681_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7681_dilations_0 = const()[name = string("op_7681_dilations_0"), val = tensor([1, 1])]; int32 var_7681_groups_0 = const()[name = string("op_7681_groups_0"), val = int32(1)]; tensor var_7681 = conv(dilations = var_7681_dilations_0, groups = var_7681_groups_0, pad = var_7681_pad_0, pad_type = var_7681_pad_type_0, strides = var_7681_strides_0, weight = model_model_layers_13_self_attn_k_proj_weight_palettized, x = var_7643)[name = string("op_7681")]; tensor var_7686 = const()[name = string("op_7686"), val = tensor([1, 8, 1, 128])]; tensor var_7687 = reshape(shape = var_7686, x = var_7681)[name = string("op_7687")]; string var_7703_pad_type_0 = const()[name = string("op_7703_pad_type_0"), val = string("valid")]; tensor var_7703_strides_0 = const()[name = string("op_7703_strides_0"), val = tensor([1, 1])]; tensor var_7703_pad_0 = const()[name = string("op_7703_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7703_dilations_0 = const()[name = string("op_7703_dilations_0"), val = tensor([1, 1])]; int32 var_7703_groups_0 = const()[name = string("op_7703_groups_0"), val = int32(1)]; tensor var_7703 = conv(dilations = var_7703_dilations_0, groups = var_7703_groups_0, pad = var_7703_pad_0, pad_type = var_7703_pad_type_0, strides = var_7703_strides_0, weight = model_model_layers_13_self_attn_v_proj_weight_palettized, x = var_7643)[name = string("op_7703")]; tensor var_7708 = const()[name = string("op_7708"), val = tensor([1, 8, 1, 128])]; tensor var_7709 = reshape(shape = var_7708, x = var_7703)[name = string("op_7709")]; tensor mean_107_axes_0 = const()[name = string("mean_107_axes_0"), val = tensor([-1])]; bool mean_107_keep_dims_0 = const()[name = string("mean_107_keep_dims_0"), val = bool(true)]; tensor mean_107 = reduce_mean(axes = mean_107_axes_0, keep_dims = mean_107_keep_dims_0, x = var_7665)[name = string("mean_107")]; tensor input_239 = sub(x = var_7665, y = mean_107)[name = string("input_239")]; tensor var_7730_axes_0 = const()[name = string("op_7730_axes_0"), val = tensor([-1])]; tensor model_model_layers_13_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_13_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1052160640)))]; fp16 var_7718_to_fp16 = const()[name = string("op_7718_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7730_cast_fp16 = layer_norm(axes = var_7730_axes_0, epsilon = var_7718_to_fp16, gamma = model_model_layers_13_self_attn_q_norm_weight_to_fp16, x = input_239)[name = string("op_7730_cast_fp16")]; tensor mean_109_axes_0 = const()[name = string("mean_109_axes_0"), val = tensor([-1])]; bool mean_109_keep_dims_0 = const()[name = string("mean_109_keep_dims_0"), val = bool(true)]; tensor mean_109 = reduce_mean(axes = mean_109_axes_0, keep_dims = mean_109_keep_dims_0, x = var_7687)[name = string("mean_109")]; tensor input_241 = sub(x = var_7687, y = mean_109)[name = string("input_241")]; tensor var_7748_axes_0 = const()[name = string("op_7748_axes_0"), val = tensor([-1])]; tensor model_model_layers_13_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_13_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1052160960)))]; fp16 var_7736_to_fp16 = const()[name = string("op_7736_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7748_cast_fp16 = layer_norm(axes = var_7748_axes_0, epsilon = var_7736_to_fp16, gamma = model_model_layers_13_self_attn_k_norm_weight_to_fp16, x = input_241)[name = string("op_7748_cast_fp16")]; tensor var_7751 = mul(x = var_7730_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7751")]; tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_53 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = var_7730_cast_fp16)[name = string("x1_53")]; tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_53 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = var_7730_cast_fp16)[name = string("x2_53")]; fp16 const_239_promoted = const()[name = string("const_239_promoted"), val = fp16(-0x1p+0)]; tensor var_7772 = mul(x = x2_53, y = const_239_promoted)[name = string("op_7772")]; int32 var_7774 = const()[name = string("op_7774"), val = int32(-1)]; bool var_7775_interleave_0 = const()[name = string("op_7775_interleave_0"), val = bool(false)]; tensor var_7775 = concat(axis = var_7774, interleave = var_7775_interleave_0, values = (var_7772, x1_53))[name = string("op_7775")]; tensor var_7776 = mul(x = var_7775, y = sin_1_cast_fp16)[name = string("op_7776")]; tensor query_states_53 = add(x = var_7751, y = var_7776)[name = string("query_states_53")]; tensor var_7779 = mul(x = var_7748_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7779")]; tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_55 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = var_7748_cast_fp16)[name = string("x1_55")]; tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_55 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = var_7748_cast_fp16)[name = string("x2_55")]; fp16 const_242_promoted = const()[name = string("const_242_promoted"), val = fp16(-0x1p+0)]; tensor var_7800 = mul(x = x2_55, y = const_242_promoted)[name = string("op_7800")]; int32 var_7802 = const()[name = string("op_7802"), val = int32(-1)]; bool var_7803_interleave_0 = const()[name = string("op_7803_interleave_0"), val = bool(false)]; tensor var_7803 = concat(axis = var_7802, interleave = var_7803_interleave_0, values = (var_7800, x1_55))[name = string("op_7803")]; tensor var_7804 = mul(x = var_7803, y = sin_1_cast_fp16)[name = string("op_7804")]; tensor key_states_53 = add(x = var_7779, y = var_7804)[name = string("key_states_53")]; tensor expand_dims_156 = const()[name = string("expand_dims_156"), val = tensor([13])]; tensor expand_dims_157 = const()[name = string("expand_dims_157"), val = tensor([0])]; tensor expand_dims_159 = const()[name = string("expand_dims_159"), val = tensor([0])]; tensor expand_dims_160 = const()[name = string("expand_dims_160"), val = tensor([14])]; int32 concat_106_axis_0 = const()[name = string("concat_106_axis_0"), val = int32(0)]; bool concat_106_interleave_0 = const()[name = string("concat_106_interleave_0"), val = bool(false)]; tensor concat_106 = concat(axis = concat_106_axis_0, interleave = concat_106_interleave_0, values = (expand_dims_156, expand_dims_157, current_pos, expand_dims_159))[name = string("concat_106")]; tensor concat_107_values1_0 = const()[name = string("concat_107_values1_0"), val = tensor([0])]; tensor concat_107_values3_0 = const()[name = string("concat_107_values3_0"), val = tensor([0])]; int32 concat_107_axis_0 = const()[name = string("concat_107_axis_0"), val = int32(0)]; bool concat_107_interleave_0 = const()[name = string("concat_107_interleave_0"), val = bool(false)]; tensor concat_107 = concat(axis = concat_107_axis_0, interleave = concat_107_interleave_0, values = (expand_dims_160, concat_107_values1_0, var_1725, concat_107_values3_0))[name = string("concat_107")]; tensor model_model_kv_cache_0_internal_tensor_assign_27_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_106, begin_mask = model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0, end = concat_107, end_mask = model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_27_stride_0, update = key_states_53, x = coreml_update_state_81)[name = string("model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_26_write_state")]; tensor coreml_update_state_82 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_26")]; tensor expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor([41])]; tensor expand_dims_163 = const()[name = string("expand_dims_163"), val = tensor([0])]; tensor expand_dims_165 = const()[name = string("expand_dims_165"), val = tensor([0])]; tensor expand_dims_166 = const()[name = string("expand_dims_166"), val = tensor([42])]; int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (expand_dims_162, expand_dims_163, current_pos, expand_dims_165))[name = string("concat_110")]; tensor concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor([0])]; tensor concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor([0])]; int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (expand_dims_166, concat_111_values1_0, var_1725, concat_111_values3_0))[name = string("concat_111")]; tensor model_model_kv_cache_0_internal_tensor_assign_28_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_110, begin_mask = model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0, end = concat_111, end_mask = model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_28_stride_0, update = var_7709, x = coreml_update_state_82)[name = string("model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_27_write_state")]; tensor coreml_update_state_83 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_27")]; tensor var_7859_begin_0 = const()[name = string("op_7859_begin_0"), val = tensor([13, 0, 0, 0])]; tensor var_7859_end_0 = const()[name = string("op_7859_end_0"), val = tensor([14, 8, 1024, 128])]; tensor var_7859_end_mask_0 = const()[name = string("op_7859_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7859_cast_fp16 = slice_by_index(begin = var_7859_begin_0, end = var_7859_end_0, end_mask = var_7859_end_mask_0, x = coreml_update_state_83)[name = string("op_7859_cast_fp16")]; tensor K_layer_cache_27_axes_0 = const()[name = string("K_layer_cache_27_axes_0"), val = tensor([0])]; tensor K_layer_cache_27_cast_fp16 = squeeze(axes = K_layer_cache_27_axes_0, x = var_7859_cast_fp16)[name = string("K_layer_cache_27_cast_fp16")]; tensor var_7866_begin_0 = const()[name = string("op_7866_begin_0"), val = tensor([41, 0, 0, 0])]; tensor var_7866_end_0 = const()[name = string("op_7866_end_0"), val = tensor([42, 8, 1024, 128])]; tensor var_7866_end_mask_0 = const()[name = string("op_7866_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7866_cast_fp16 = slice_by_index(begin = var_7866_begin_0, end = var_7866_end_0, end_mask = var_7866_end_mask_0, x = coreml_update_state_83)[name = string("op_7866_cast_fp16")]; tensor V_layer_cache_27_axes_0 = const()[name = string("V_layer_cache_27_axes_0"), val = tensor([0])]; tensor V_layer_cache_27_cast_fp16 = squeeze(axes = V_layer_cache_27_axes_0, x = var_7866_cast_fp16)[name = string("V_layer_cache_27_cast_fp16")]; tensor x_267_axes_0 = const()[name = string("x_267_axes_0"), val = tensor([1])]; tensor x_267_cast_fp16 = expand_dims(axes = x_267_axes_0, x = K_layer_cache_27_cast_fp16)[name = string("x_267_cast_fp16")]; tensor var_7903 = const()[name = string("op_7903"), val = tensor([1, 2, 1, 1])]; tensor x_269_cast_fp16 = tile(reps = var_7903, x = x_267_cast_fp16)[name = string("x_269_cast_fp16")]; tensor var_7915 = const()[name = string("op_7915"), val = tensor([1, -1, 1024, 128])]; tensor key_states_55_cast_fp16 = reshape(shape = var_7915, x = x_269_cast_fp16)[name = string("key_states_55_cast_fp16")]; tensor x_273_axes_0 = const()[name = string("x_273_axes_0"), val = tensor([1])]; tensor x_273_cast_fp16 = expand_dims(axes = x_273_axes_0, x = V_layer_cache_27_cast_fp16)[name = string("x_273_cast_fp16")]; tensor var_7923 = const()[name = string("op_7923"), val = tensor([1, 2, 1, 1])]; tensor x_275_cast_fp16 = tile(reps = var_7923, x = x_273_cast_fp16)[name = string("x_275_cast_fp16")]; tensor var_7935 = const()[name = string("op_7935"), val = tensor([1, -1, 1024, 128])]; tensor value_states_81_cast_fp16 = reshape(shape = var_7935, x = x_275_cast_fp16)[name = string("value_states_81_cast_fp16")]; bool var_7950_transpose_x_1 = const()[name = string("op_7950_transpose_x_1"), val = bool(false)]; bool var_7950_transpose_y_1 = const()[name = string("op_7950_transpose_y_1"), val = bool(true)]; tensor var_7950 = matmul(transpose_x = var_7950_transpose_x_1, transpose_y = var_7950_transpose_y_1, x = query_states_53, y = key_states_55_cast_fp16)[name = string("op_7950")]; fp16 var_7951_to_fp16 = const()[name = string("op_7951_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_79_cast_fp16 = mul(x = var_7950, y = var_7951_to_fp16)[name = string("attn_weights_79_cast_fp16")]; tensor attn_weights_81_cast_fp16 = add(x = attn_weights_79_cast_fp16, y = causal_mask)[name = string("attn_weights_81_cast_fp16")]; int32 var_7986 = const()[name = string("op_7986"), val = int32(-1)]; tensor attn_weights_83_cast_fp16 = softmax(axis = var_7986, x = attn_weights_81_cast_fp16)[name = string("attn_weights_83_cast_fp16")]; bool attn_output_131_transpose_x_0 = const()[name = string("attn_output_131_transpose_x_0"), val = bool(false)]; bool attn_output_131_transpose_y_0 = const()[name = string("attn_output_131_transpose_y_0"), val = bool(false)]; tensor attn_output_131_cast_fp16 = matmul(transpose_x = attn_output_131_transpose_x_0, transpose_y = attn_output_131_transpose_y_0, x = attn_weights_83_cast_fp16, y = value_states_81_cast_fp16)[name = string("attn_output_131_cast_fp16")]; tensor var_7997_perm_0 = const()[name = string("op_7997_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_8001 = const()[name = string("op_8001"), val = tensor([1, 1, 2048])]; tensor var_7997_cast_fp16 = transpose(perm = var_7997_perm_0, x = attn_output_131_cast_fp16)[name = string("transpose_88")]; tensor attn_output_135_cast_fp16 = reshape(shape = var_8001, x = var_7997_cast_fp16)[name = string("attn_output_135_cast_fp16")]; tensor var_8006 = const()[name = string("op_8006"), val = tensor([0, 2, 1])]; string var_8022_pad_type_0 = const()[name = string("op_8022_pad_type_0"), val = string("valid")]; int32 var_8022_groups_0 = const()[name = string("op_8022_groups_0"), val = int32(1)]; tensor var_8022_strides_0 = const()[name = string("op_8022_strides_0"), val = tensor([1])]; tensor var_8022_pad_0 = const()[name = string("op_8022_pad_0"), val = tensor([0, 0])]; tensor var_8022_dilations_0 = const()[name = string("op_8022_dilations_0"), val = tensor([1])]; tensor squeeze_13_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1052161280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1055307072))))[name = string("squeeze_13_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_8007_cast_fp16 = transpose(perm = var_8006, x = attn_output_135_cast_fp16)[name = string("transpose_87")]; tensor var_8022_cast_fp16 = conv(dilations = var_8022_dilations_0, groups = var_8022_groups_0, pad = var_8022_pad_0, pad_type = var_8022_pad_type_0, strides = var_8022_strides_0, weight = squeeze_13_cast_fp16_to_fp32_to_fp16_palettized, x = var_8007_cast_fp16)[name = string("op_8022_cast_fp16")]; tensor var_8026 = const()[name = string("op_8026"), val = tensor([0, 2, 1])]; tensor attn_output_139_cast_fp16 = transpose(perm = var_8026, x = var_8022_cast_fp16)[name = string("transpose_86")]; tensor hidden_states_83_cast_fp16 = add(x = hidden_states_79_cast_fp16, y = attn_output_139_cast_fp16)[name = string("hidden_states_83_cast_fp16")]; tensor mean_111_axes_0 = const()[name = string("mean_111_axes_0"), val = tensor([-1])]; bool mean_111_keep_dims_0 = const()[name = string("mean_111_keep_dims_0"), val = bool(true)]; tensor mean_111_cast_fp16 = reduce_mean(axes = mean_111_axes_0, keep_dims = mean_111_keep_dims_0, x = hidden_states_83_cast_fp16)[name = string("mean_111_cast_fp16")]; tensor input_245_cast_fp16 = sub(x = hidden_states_83_cast_fp16, y = mean_111_cast_fp16)[name = string("input_245_cast_fp16")]; tensor var_8045_axes_0 = const()[name = string("op_8045_axes_0"), val = tensor([-1])]; tensor model_model_layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1055339904)))]; fp16 var_8033_to_fp16 = const()[name = string("op_8033_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8045_cast_fp16 = layer_norm(axes = var_8045_axes_0, epsilon = var_8033_to_fp16, gamma = model_model_layers_13_post_attention_layernorm_weight_to_fp16, x = input_245_cast_fp16)[name = string("op_8045_cast_fp16")]; tensor var_8059 = const()[name = string("op_8059"), val = tensor([0, 2, 1])]; tensor input_247_axes_0 = const()[name = string("input_247_axes_0"), val = tensor([2])]; tensor var_8060 = transpose(perm = var_8059, x = var_8045_cast_fp16)[name = string("transpose_85")]; tensor input_247 = expand_dims(axes = input_247_axes_0, x = var_8060)[name = string("input_247")]; string input_249_pad_type_0 = const()[name = string("input_249_pad_type_0"), val = string("valid")]; tensor input_249_strides_0 = const()[name = string("input_249_strides_0"), val = tensor([1, 1])]; tensor input_249_pad_0 = const()[name = string("input_249_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_249_dilations_0 = const()[name = string("input_249_dilations_0"), val = tensor([1, 1])]; int32 input_249_groups_0 = const()[name = string("input_249_groups_0"), val = int32(1)]; tensor input_249 = conv(dilations = input_249_dilations_0, groups = input_249_groups_0, pad = input_249_pad_0, pad_type = input_249_pad_type_0, strides = input_249_strides_0, weight = model_model_layers_13_mlp_gate_proj_weight_palettized, x = input_247)[name = string("input_249")]; string b_27_pad_type_0 = const()[name = string("b_27_pad_type_0"), val = string("valid")]; tensor b_27_strides_0 = const()[name = string("b_27_strides_0"), val = tensor([1, 1])]; tensor b_27_pad_0 = const()[name = string("b_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_27_dilations_0 = const()[name = string("b_27_dilations_0"), val = tensor([1, 1])]; int32 b_27_groups_0 = const()[name = string("b_27_groups_0"), val = int32(1)]; tensor b_27 = conv(dilations = b_27_dilations_0, groups = b_27_groups_0, pad = b_27_pad_0, pad_type = b_27_pad_type_0, strides = b_27_strides_0, weight = model_model_layers_13_mlp_up_proj_weight_palettized, x = input_247)[name = string("b_27")]; tensor c_27 = silu(x = input_249)[name = string("c_27")]; tensor input_251 = mul(x = c_27, y = b_27)[name = string("input_251")]; string e_27_pad_type_0 = const()[name = string("e_27_pad_type_0"), val = string("valid")]; tensor e_27_strides_0 = const()[name = string("e_27_strides_0"), val = tensor([1, 1])]; tensor e_27_pad_0 = const()[name = string("e_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_27_dilations_0 = const()[name = string("e_27_dilations_0"), val = tensor([1, 1])]; int32 e_27_groups_0 = const()[name = string("e_27_groups_0"), val = int32(1)]; tensor e_27 = conv(dilations = e_27_dilations_0, groups = e_27_groups_0, pad = e_27_pad_0, pad_type = e_27_pad_type_0, strides = e_27_strides_0, weight = model_model_layers_13_mlp_down_proj_weight_palettized, x = input_251)[name = string("e_27")]; tensor var_8082_axes_0 = const()[name = string("op_8082_axes_0"), val = tensor([2])]; tensor var_8082 = squeeze(axes = var_8082_axes_0, x = e_27)[name = string("op_8082")]; tensor var_8083 = const()[name = string("op_8083"), val = tensor([0, 2, 1])]; tensor var_8084 = transpose(perm = var_8083, x = var_8082)[name = string("transpose_84")]; tensor hidden_states_85_cast_fp16 = add(x = hidden_states_83_cast_fp16, y = var_8084)[name = string("hidden_states_85_cast_fp16")]; tensor mean_113_axes_0 = const()[name = string("mean_113_axes_0"), val = tensor([-1])]; bool mean_113_keep_dims_0 = const()[name = string("mean_113_keep_dims_0"), val = bool(true)]; tensor mean_113_cast_fp16 = reduce_mean(axes = mean_113_axes_0, keep_dims = mean_113_keep_dims_0, x = hidden_states_85_cast_fp16)[name = string("mean_113_cast_fp16")]; tensor input_253_cast_fp16 = sub(x = hidden_states_85_cast_fp16, y = mean_113_cast_fp16)[name = string("input_253_cast_fp16")]; tensor var_8102_axes_0 = const()[name = string("op_8102_axes_0"), val = tensor([-1])]; tensor model_model_layers_14_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1055344064)))]; fp16 var_8090_to_fp16 = const()[name = string("op_8090_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8102_cast_fp16 = layer_norm(axes = var_8102_axes_0, epsilon = var_8090_to_fp16, gamma = model_model_layers_14_input_layernorm_weight_to_fp16, x = input_253_cast_fp16)[name = string("op_8102_cast_fp16")]; tensor var_8108 = const()[name = string("op_8108"), val = tensor([0, 2, 1])]; tensor var_8111_axes_0 = const()[name = string("op_8111_axes_0"), val = tensor([2])]; tensor var_8109 = transpose(perm = var_8108, x = var_8102_cast_fp16)[name = string("transpose_83")]; tensor var_8111 = expand_dims(axes = var_8111_axes_0, x = var_8109)[name = string("op_8111")]; string var_8127_pad_type_0 = const()[name = string("op_8127_pad_type_0"), val = string("valid")]; tensor var_8127_strides_0 = const()[name = string("op_8127_strides_0"), val = tensor([1, 1])]; tensor var_8127_pad_0 = const()[name = string("op_8127_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8127_dilations_0 = const()[name = string("op_8127_dilations_0"), val = tensor([1, 1])]; int32 var_8127_groups_0 = const()[name = string("op_8127_groups_0"), val = int32(1)]; tensor var_8127 = conv(dilations = var_8127_dilations_0, groups = var_8127_groups_0, pad = var_8127_pad_0, pad_type = var_8127_pad_type_0, strides = var_8127_strides_0, weight = model_model_layers_14_self_attn_q_proj_weight_palettized, x = var_8111)[name = string("op_8127")]; tensor var_8132 = const()[name = string("op_8132"), val = tensor([1, 16, 1, 128])]; tensor var_8133 = reshape(shape = var_8132, x = var_8127)[name = string("op_8133")]; string var_8149_pad_type_0 = const()[name = string("op_8149_pad_type_0"), val = string("valid")]; tensor var_8149_strides_0 = const()[name = string("op_8149_strides_0"), val = tensor([1, 1])]; tensor var_8149_pad_0 = const()[name = string("op_8149_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8149_dilations_0 = const()[name = string("op_8149_dilations_0"), val = tensor([1, 1])]; int32 var_8149_groups_0 = const()[name = string("op_8149_groups_0"), val = int32(1)]; tensor var_8149 = conv(dilations = var_8149_dilations_0, groups = var_8149_groups_0, pad = var_8149_pad_0, pad_type = var_8149_pad_type_0, strides = var_8149_strides_0, weight = model_model_layers_14_self_attn_k_proj_weight_palettized, x = var_8111)[name = string("op_8149")]; tensor var_8154 = const()[name = string("op_8154"), val = tensor([1, 8, 1, 128])]; tensor var_8155 = reshape(shape = var_8154, x = var_8149)[name = string("op_8155")]; string var_8171_pad_type_0 = const()[name = string("op_8171_pad_type_0"), val = string("valid")]; tensor var_8171_strides_0 = const()[name = string("op_8171_strides_0"), val = tensor([1, 1])]; tensor var_8171_pad_0 = const()[name = string("op_8171_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8171_dilations_0 = const()[name = string("op_8171_dilations_0"), val = tensor([1, 1])]; int32 var_8171_groups_0 = const()[name = string("op_8171_groups_0"), val = int32(1)]; tensor var_8171 = conv(dilations = var_8171_dilations_0, groups = var_8171_groups_0, pad = var_8171_pad_0, pad_type = var_8171_pad_type_0, strides = var_8171_strides_0, weight = model_model_layers_14_self_attn_v_proj_weight_palettized, x = var_8111)[name = string("op_8171")]; tensor var_8176 = const()[name = string("op_8176"), val = tensor([1, 8, 1, 128])]; tensor var_8177 = reshape(shape = var_8176, x = var_8171)[name = string("op_8177")]; tensor mean_115_axes_0 = const()[name = string("mean_115_axes_0"), val = tensor([-1])]; bool mean_115_keep_dims_0 = const()[name = string("mean_115_keep_dims_0"), val = bool(true)]; tensor mean_115 = reduce_mean(axes = mean_115_axes_0, keep_dims = mean_115_keep_dims_0, x = var_8133)[name = string("mean_115")]; tensor input_257 = sub(x = var_8133, y = mean_115)[name = string("input_257")]; tensor var_8198_axes_0 = const()[name = string("op_8198_axes_0"), val = tensor([-1])]; tensor model_model_layers_14_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_14_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1055348224)))]; fp16 var_8186_to_fp16 = const()[name = string("op_8186_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8198_cast_fp16 = layer_norm(axes = var_8198_axes_0, epsilon = var_8186_to_fp16, gamma = model_model_layers_14_self_attn_q_norm_weight_to_fp16, x = input_257)[name = string("op_8198_cast_fp16")]; tensor mean_117_axes_0 = const()[name = string("mean_117_axes_0"), val = tensor([-1])]; bool mean_117_keep_dims_0 = const()[name = string("mean_117_keep_dims_0"), val = bool(true)]; tensor mean_117 = reduce_mean(axes = mean_117_axes_0, keep_dims = mean_117_keep_dims_0, x = var_8155)[name = string("mean_117")]; tensor input_259 = sub(x = var_8155, y = mean_117)[name = string("input_259")]; tensor var_8216_axes_0 = const()[name = string("op_8216_axes_0"), val = tensor([-1])]; tensor model_model_layers_14_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_14_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1055348544)))]; fp16 var_8204_to_fp16 = const()[name = string("op_8204_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8216_cast_fp16 = layer_norm(axes = var_8216_axes_0, epsilon = var_8204_to_fp16, gamma = model_model_layers_14_self_attn_k_norm_weight_to_fp16, x = input_259)[name = string("op_8216_cast_fp16")]; tensor var_8219 = mul(x = var_8198_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8219")]; tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_57 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = var_8198_cast_fp16)[name = string("x1_57")]; tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_57 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = var_8198_cast_fp16)[name = string("x2_57")]; fp16 const_257_promoted = const()[name = string("const_257_promoted"), val = fp16(-0x1p+0)]; tensor var_8240 = mul(x = x2_57, y = const_257_promoted)[name = string("op_8240")]; int32 var_8242 = const()[name = string("op_8242"), val = int32(-1)]; bool var_8243_interleave_0 = const()[name = string("op_8243_interleave_0"), val = bool(false)]; tensor var_8243 = concat(axis = var_8242, interleave = var_8243_interleave_0, values = (var_8240, x1_57))[name = string("op_8243")]; tensor var_8244 = mul(x = var_8243, y = sin_1_cast_fp16)[name = string("op_8244")]; tensor query_states_57 = add(x = var_8219, y = var_8244)[name = string("query_states_57")]; tensor var_8247 = mul(x = var_8216_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8247")]; tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_59 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = var_8216_cast_fp16)[name = string("x1_59")]; tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_59 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = var_8216_cast_fp16)[name = string("x2_59")]; fp16 const_260_promoted = const()[name = string("const_260_promoted"), val = fp16(-0x1p+0)]; tensor var_8268 = mul(x = x2_59, y = const_260_promoted)[name = string("op_8268")]; int32 var_8270 = const()[name = string("op_8270"), val = int32(-1)]; bool var_8271_interleave_0 = const()[name = string("op_8271_interleave_0"), val = bool(false)]; tensor var_8271 = concat(axis = var_8270, interleave = var_8271_interleave_0, values = (var_8268, x1_59))[name = string("op_8271")]; tensor var_8272 = mul(x = var_8271, y = sin_1_cast_fp16)[name = string("op_8272")]; tensor key_states_57 = add(x = var_8247, y = var_8272)[name = string("key_states_57")]; tensor expand_dims_168 = const()[name = string("expand_dims_168"), val = tensor([14])]; tensor expand_dims_169 = const()[name = string("expand_dims_169"), val = tensor([0])]; tensor expand_dims_171 = const()[name = string("expand_dims_171"), val = tensor([0])]; tensor expand_dims_172 = const()[name = string("expand_dims_172"), val = tensor([15])]; int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (expand_dims_168, expand_dims_169, current_pos, expand_dims_171))[name = string("concat_114")]; tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_172, concat_115_values1_0, var_1725, concat_115_values3_0))[name = string("concat_115")]; tensor model_model_kv_cache_0_internal_tensor_assign_29_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_114, begin_mask = model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0, end = concat_115, end_mask = model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_29_stride_0, update = key_states_57, x = coreml_update_state_83)[name = string("model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_28_write_state")]; tensor coreml_update_state_84 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_28")]; tensor expand_dims_174 = const()[name = string("expand_dims_174"), val = tensor([42])]; tensor expand_dims_175 = const()[name = string("expand_dims_175"), val = tensor([0])]; tensor expand_dims_177 = const()[name = string("expand_dims_177"), val = tensor([0])]; tensor expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor([43])]; int32 concat_118_axis_0 = const()[name = string("concat_118_axis_0"), val = int32(0)]; bool concat_118_interleave_0 = const()[name = string("concat_118_interleave_0"), val = bool(false)]; tensor concat_118 = concat(axis = concat_118_axis_0, interleave = concat_118_interleave_0, values = (expand_dims_174, expand_dims_175, current_pos, expand_dims_177))[name = string("concat_118")]; tensor concat_119_values1_0 = const()[name = string("concat_119_values1_0"), val = tensor([0])]; tensor concat_119_values3_0 = const()[name = string("concat_119_values3_0"), val = tensor([0])]; int32 concat_119_axis_0 = const()[name = string("concat_119_axis_0"), val = int32(0)]; bool concat_119_interleave_0 = const()[name = string("concat_119_interleave_0"), val = bool(false)]; tensor concat_119 = concat(axis = concat_119_axis_0, interleave = concat_119_interleave_0, values = (expand_dims_178, concat_119_values1_0, var_1725, concat_119_values3_0))[name = string("concat_119")]; tensor model_model_kv_cache_0_internal_tensor_assign_30_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_118, begin_mask = model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0, end = concat_119, end_mask = model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_30_stride_0, update = var_8177, x = coreml_update_state_84)[name = string("model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_29_write_state")]; tensor coreml_update_state_85 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_29")]; tensor var_8327_begin_0 = const()[name = string("op_8327_begin_0"), val = tensor([14, 0, 0, 0])]; tensor var_8327_end_0 = const()[name = string("op_8327_end_0"), val = tensor([15, 8, 1024, 128])]; tensor var_8327_end_mask_0 = const()[name = string("op_8327_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8327_cast_fp16 = slice_by_index(begin = var_8327_begin_0, end = var_8327_end_0, end_mask = var_8327_end_mask_0, x = coreml_update_state_85)[name = string("op_8327_cast_fp16")]; tensor K_layer_cache_29_axes_0 = const()[name = string("K_layer_cache_29_axes_0"), val = tensor([0])]; tensor K_layer_cache_29_cast_fp16 = squeeze(axes = K_layer_cache_29_axes_0, x = var_8327_cast_fp16)[name = string("K_layer_cache_29_cast_fp16")]; tensor var_8334_begin_0 = const()[name = string("op_8334_begin_0"), val = tensor([42, 0, 0, 0])]; tensor var_8334_end_0 = const()[name = string("op_8334_end_0"), val = tensor([43, 8, 1024, 128])]; tensor var_8334_end_mask_0 = const()[name = string("op_8334_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8334_cast_fp16 = slice_by_index(begin = var_8334_begin_0, end = var_8334_end_0, end_mask = var_8334_end_mask_0, x = coreml_update_state_85)[name = string("op_8334_cast_fp16")]; tensor V_layer_cache_29_axes_0 = const()[name = string("V_layer_cache_29_axes_0"), val = tensor([0])]; tensor V_layer_cache_29_cast_fp16 = squeeze(axes = V_layer_cache_29_axes_0, x = var_8334_cast_fp16)[name = string("V_layer_cache_29_cast_fp16")]; tensor x_287_axes_0 = const()[name = string("x_287_axes_0"), val = tensor([1])]; tensor x_287_cast_fp16 = expand_dims(axes = x_287_axes_0, x = K_layer_cache_29_cast_fp16)[name = string("x_287_cast_fp16")]; tensor var_8371 = const()[name = string("op_8371"), val = tensor([1, 2, 1, 1])]; tensor x_289_cast_fp16 = tile(reps = var_8371, x = x_287_cast_fp16)[name = string("x_289_cast_fp16")]; tensor var_8383 = const()[name = string("op_8383"), val = tensor([1, -1, 1024, 128])]; tensor key_states_59_cast_fp16 = reshape(shape = var_8383, x = x_289_cast_fp16)[name = string("key_states_59_cast_fp16")]; tensor x_293_axes_0 = const()[name = string("x_293_axes_0"), val = tensor([1])]; tensor x_293_cast_fp16 = expand_dims(axes = x_293_axes_0, x = V_layer_cache_29_cast_fp16)[name = string("x_293_cast_fp16")]; tensor var_8391 = const()[name = string("op_8391"), val = tensor([1, 2, 1, 1])]; tensor x_295_cast_fp16 = tile(reps = var_8391, x = x_293_cast_fp16)[name = string("x_295_cast_fp16")]; tensor var_8403 = const()[name = string("op_8403"), val = tensor([1, -1, 1024, 128])]; tensor value_states_87_cast_fp16 = reshape(shape = var_8403, x = x_295_cast_fp16)[name = string("value_states_87_cast_fp16")]; bool var_8418_transpose_x_1 = const()[name = string("op_8418_transpose_x_1"), val = bool(false)]; bool var_8418_transpose_y_1 = const()[name = string("op_8418_transpose_y_1"), val = bool(true)]; tensor var_8418 = matmul(transpose_x = var_8418_transpose_x_1, transpose_y = var_8418_transpose_y_1, x = query_states_57, y = key_states_59_cast_fp16)[name = string("op_8418")]; fp16 var_8419_to_fp16 = const()[name = string("op_8419_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_85_cast_fp16 = mul(x = var_8418, y = var_8419_to_fp16)[name = string("attn_weights_85_cast_fp16")]; tensor attn_weights_87_cast_fp16 = add(x = attn_weights_85_cast_fp16, y = causal_mask)[name = string("attn_weights_87_cast_fp16")]; int32 var_8454 = const()[name = string("op_8454"), val = int32(-1)]; tensor attn_weights_89_cast_fp16 = softmax(axis = var_8454, x = attn_weights_87_cast_fp16)[name = string("attn_weights_89_cast_fp16")]; bool attn_output_141_transpose_x_0 = const()[name = string("attn_output_141_transpose_x_0"), val = bool(false)]; bool attn_output_141_transpose_y_0 = const()[name = string("attn_output_141_transpose_y_0"), val = bool(false)]; tensor attn_output_141_cast_fp16 = matmul(transpose_x = attn_output_141_transpose_x_0, transpose_y = attn_output_141_transpose_y_0, x = attn_weights_89_cast_fp16, y = value_states_87_cast_fp16)[name = string("attn_output_141_cast_fp16")]; tensor var_8465_perm_0 = const()[name = string("op_8465_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_8469 = const()[name = string("op_8469"), val = tensor([1, 1, 2048])]; tensor var_8465_cast_fp16 = transpose(perm = var_8465_perm_0, x = attn_output_141_cast_fp16)[name = string("transpose_82")]; tensor attn_output_145_cast_fp16 = reshape(shape = var_8469, x = var_8465_cast_fp16)[name = string("attn_output_145_cast_fp16")]; tensor var_8474 = const()[name = string("op_8474"), val = tensor([0, 2, 1])]; string var_8490_pad_type_0 = const()[name = string("op_8490_pad_type_0"), val = string("valid")]; int32 var_8490_groups_0 = const()[name = string("op_8490_groups_0"), val = int32(1)]; tensor var_8490_strides_0 = const()[name = string("op_8490_strides_0"), val = tensor([1])]; tensor var_8490_pad_0 = const()[name = string("op_8490_pad_0"), val = tensor([0, 0])]; tensor var_8490_dilations_0 = const()[name = string("op_8490_dilations_0"), val = tensor([1])]; tensor squeeze_14_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1055348864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1058494656))))[name = string("squeeze_14_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_8475_cast_fp16 = transpose(perm = var_8474, x = attn_output_145_cast_fp16)[name = string("transpose_81")]; tensor var_8490_cast_fp16 = conv(dilations = var_8490_dilations_0, groups = var_8490_groups_0, pad = var_8490_pad_0, pad_type = var_8490_pad_type_0, strides = var_8490_strides_0, weight = squeeze_14_cast_fp16_to_fp32_to_fp16_palettized, x = var_8475_cast_fp16)[name = string("op_8490_cast_fp16")]; tensor var_8494 = const()[name = string("op_8494"), val = tensor([0, 2, 1])]; tensor attn_output_149_cast_fp16 = transpose(perm = var_8494, x = var_8490_cast_fp16)[name = string("transpose_80")]; tensor hidden_states_89_cast_fp16 = add(x = hidden_states_85_cast_fp16, y = attn_output_149_cast_fp16)[name = string("hidden_states_89_cast_fp16")]; tensor mean_119_axes_0 = const()[name = string("mean_119_axes_0"), val = tensor([-1])]; bool mean_119_keep_dims_0 = const()[name = string("mean_119_keep_dims_0"), val = bool(true)]; tensor mean_119_cast_fp16 = reduce_mean(axes = mean_119_axes_0, keep_dims = mean_119_keep_dims_0, x = hidden_states_89_cast_fp16)[name = string("mean_119_cast_fp16")]; tensor input_263_cast_fp16 = sub(x = hidden_states_89_cast_fp16, y = mean_119_cast_fp16)[name = string("input_263_cast_fp16")]; tensor var_8513_axes_0 = const()[name = string("op_8513_axes_0"), val = tensor([-1])]; tensor model_model_layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1058527488)))]; fp16 var_8501_to_fp16 = const()[name = string("op_8501_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8513_cast_fp16 = layer_norm(axes = var_8513_axes_0, epsilon = var_8501_to_fp16, gamma = model_model_layers_14_post_attention_layernorm_weight_to_fp16, x = input_263_cast_fp16)[name = string("op_8513_cast_fp16")]; tensor var_8527 = const()[name = string("op_8527"), val = tensor([0, 2, 1])]; tensor input_265_axes_0 = const()[name = string("input_265_axes_0"), val = tensor([2])]; tensor var_8528 = transpose(perm = var_8527, x = var_8513_cast_fp16)[name = string("transpose_79")]; tensor input_265 = expand_dims(axes = input_265_axes_0, x = var_8528)[name = string("input_265")]; string input_267_pad_type_0 = const()[name = string("input_267_pad_type_0"), val = string("valid")]; tensor input_267_strides_0 = const()[name = string("input_267_strides_0"), val = tensor([1, 1])]; tensor input_267_pad_0 = const()[name = string("input_267_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_267_dilations_0 = const()[name = string("input_267_dilations_0"), val = tensor([1, 1])]; int32 input_267_groups_0 = const()[name = string("input_267_groups_0"), val = int32(1)]; tensor input_267 = conv(dilations = input_267_dilations_0, groups = input_267_groups_0, pad = input_267_pad_0, pad_type = input_267_pad_type_0, strides = input_267_strides_0, weight = model_model_layers_14_mlp_gate_proj_weight_palettized, x = input_265)[name = string("input_267")]; string b_29_pad_type_0 = const()[name = string("b_29_pad_type_0"), val = string("valid")]; tensor b_29_strides_0 = const()[name = string("b_29_strides_0"), val = tensor([1, 1])]; tensor b_29_pad_0 = const()[name = string("b_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_29_dilations_0 = const()[name = string("b_29_dilations_0"), val = tensor([1, 1])]; int32 b_29_groups_0 = const()[name = string("b_29_groups_0"), val = int32(1)]; tensor b_29 = conv(dilations = b_29_dilations_0, groups = b_29_groups_0, pad = b_29_pad_0, pad_type = b_29_pad_type_0, strides = b_29_strides_0, weight = model_model_layers_14_mlp_up_proj_weight_palettized, x = input_265)[name = string("b_29")]; tensor c_29 = silu(x = input_267)[name = string("c_29")]; tensor input_269 = mul(x = c_29, y = b_29)[name = string("input_269")]; string e_29_pad_type_0 = const()[name = string("e_29_pad_type_0"), val = string("valid")]; tensor e_29_strides_0 = const()[name = string("e_29_strides_0"), val = tensor([1, 1])]; tensor e_29_pad_0 = const()[name = string("e_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_29_dilations_0 = const()[name = string("e_29_dilations_0"), val = tensor([1, 1])]; int32 e_29_groups_0 = const()[name = string("e_29_groups_0"), val = int32(1)]; tensor e_29 = conv(dilations = e_29_dilations_0, groups = e_29_groups_0, pad = e_29_pad_0, pad_type = e_29_pad_type_0, strides = e_29_strides_0, weight = model_model_layers_14_mlp_down_proj_weight_palettized, x = input_269)[name = string("e_29")]; tensor var_8550_axes_0 = const()[name = string("op_8550_axes_0"), val = tensor([2])]; tensor var_8550 = squeeze(axes = var_8550_axes_0, x = e_29)[name = string("op_8550")]; tensor var_8551 = const()[name = string("op_8551"), val = tensor([0, 2, 1])]; tensor var_8552 = transpose(perm = var_8551, x = var_8550)[name = string("transpose_78")]; tensor hidden_states_91_cast_fp16 = add(x = hidden_states_89_cast_fp16, y = var_8552)[name = string("hidden_states_91_cast_fp16")]; tensor mean_121_axes_0 = const()[name = string("mean_121_axes_0"), val = tensor([-1])]; bool mean_121_keep_dims_0 = const()[name = string("mean_121_keep_dims_0"), val = bool(true)]; tensor mean_121_cast_fp16 = reduce_mean(axes = mean_121_axes_0, keep_dims = mean_121_keep_dims_0, x = hidden_states_91_cast_fp16)[name = string("mean_121_cast_fp16")]; tensor input_271_cast_fp16 = sub(x = hidden_states_91_cast_fp16, y = mean_121_cast_fp16)[name = string("input_271_cast_fp16")]; tensor var_8570_axes_0 = const()[name = string("op_8570_axes_0"), val = tensor([-1])]; tensor model_model_layers_15_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1058531648)))]; fp16 var_8558_to_fp16 = const()[name = string("op_8558_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8570_cast_fp16 = layer_norm(axes = var_8570_axes_0, epsilon = var_8558_to_fp16, gamma = model_model_layers_15_input_layernorm_weight_to_fp16, x = input_271_cast_fp16)[name = string("op_8570_cast_fp16")]; tensor var_8576 = const()[name = string("op_8576"), val = tensor([0, 2, 1])]; tensor var_8579_axes_0 = const()[name = string("op_8579_axes_0"), val = tensor([2])]; tensor var_8577 = transpose(perm = var_8576, x = var_8570_cast_fp16)[name = string("transpose_77")]; tensor var_8579 = expand_dims(axes = var_8579_axes_0, x = var_8577)[name = string("op_8579")]; string var_8595_pad_type_0 = const()[name = string("op_8595_pad_type_0"), val = string("valid")]; tensor var_8595_strides_0 = const()[name = string("op_8595_strides_0"), val = tensor([1, 1])]; tensor var_8595_pad_0 = const()[name = string("op_8595_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8595_dilations_0 = const()[name = string("op_8595_dilations_0"), val = tensor([1, 1])]; int32 var_8595_groups_0 = const()[name = string("op_8595_groups_0"), val = int32(1)]; tensor var_8595 = conv(dilations = var_8595_dilations_0, groups = var_8595_groups_0, pad = var_8595_pad_0, pad_type = var_8595_pad_type_0, strides = var_8595_strides_0, weight = model_model_layers_15_self_attn_q_proj_weight_palettized, x = var_8579)[name = string("op_8595")]; tensor var_8600 = const()[name = string("op_8600"), val = tensor([1, 16, 1, 128])]; tensor var_8601 = reshape(shape = var_8600, x = var_8595)[name = string("op_8601")]; string var_8617_pad_type_0 = const()[name = string("op_8617_pad_type_0"), val = string("valid")]; tensor var_8617_strides_0 = const()[name = string("op_8617_strides_0"), val = tensor([1, 1])]; tensor var_8617_pad_0 = const()[name = string("op_8617_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8617_dilations_0 = const()[name = string("op_8617_dilations_0"), val = tensor([1, 1])]; int32 var_8617_groups_0 = const()[name = string("op_8617_groups_0"), val = int32(1)]; tensor var_8617 = conv(dilations = var_8617_dilations_0, groups = var_8617_groups_0, pad = var_8617_pad_0, pad_type = var_8617_pad_type_0, strides = var_8617_strides_0, weight = model_model_layers_15_self_attn_k_proj_weight_palettized, x = var_8579)[name = string("op_8617")]; tensor var_8622 = const()[name = string("op_8622"), val = tensor([1, 8, 1, 128])]; tensor var_8623 = reshape(shape = var_8622, x = var_8617)[name = string("op_8623")]; string var_8639_pad_type_0 = const()[name = string("op_8639_pad_type_0"), val = string("valid")]; tensor var_8639_strides_0 = const()[name = string("op_8639_strides_0"), val = tensor([1, 1])]; tensor var_8639_pad_0 = const()[name = string("op_8639_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8639_dilations_0 = const()[name = string("op_8639_dilations_0"), val = tensor([1, 1])]; int32 var_8639_groups_0 = const()[name = string("op_8639_groups_0"), val = int32(1)]; tensor var_8639 = conv(dilations = var_8639_dilations_0, groups = var_8639_groups_0, pad = var_8639_pad_0, pad_type = var_8639_pad_type_0, strides = var_8639_strides_0, weight = model_model_layers_15_self_attn_v_proj_weight_palettized, x = var_8579)[name = string("op_8639")]; tensor var_8644 = const()[name = string("op_8644"), val = tensor([1, 8, 1, 128])]; tensor var_8645 = reshape(shape = var_8644, x = var_8639)[name = string("op_8645")]; tensor mean_123_axes_0 = const()[name = string("mean_123_axes_0"), val = tensor([-1])]; bool mean_123_keep_dims_0 = const()[name = string("mean_123_keep_dims_0"), val = bool(true)]; tensor mean_123 = reduce_mean(axes = mean_123_axes_0, keep_dims = mean_123_keep_dims_0, x = var_8601)[name = string("mean_123")]; tensor input_275 = sub(x = var_8601, y = mean_123)[name = string("input_275")]; tensor var_8666_axes_0 = const()[name = string("op_8666_axes_0"), val = tensor([-1])]; tensor model_model_layers_15_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_15_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1058535808)))]; fp16 var_8654_to_fp16 = const()[name = string("op_8654_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8666_cast_fp16 = layer_norm(axes = var_8666_axes_0, epsilon = var_8654_to_fp16, gamma = model_model_layers_15_self_attn_q_norm_weight_to_fp16, x = input_275)[name = string("op_8666_cast_fp16")]; tensor mean_125_axes_0 = const()[name = string("mean_125_axes_0"), val = tensor([-1])]; bool mean_125_keep_dims_0 = const()[name = string("mean_125_keep_dims_0"), val = bool(true)]; tensor mean_125 = reduce_mean(axes = mean_125_axes_0, keep_dims = mean_125_keep_dims_0, x = var_8623)[name = string("mean_125")]; tensor input_277 = sub(x = var_8623, y = mean_125)[name = string("input_277")]; tensor var_8684_axes_0 = const()[name = string("op_8684_axes_0"), val = tensor([-1])]; tensor model_model_layers_15_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_15_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1058536128)))]; fp16 var_8672_to_fp16 = const()[name = string("op_8672_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8684_cast_fp16 = layer_norm(axes = var_8684_axes_0, epsilon = var_8672_to_fp16, gamma = model_model_layers_15_self_attn_k_norm_weight_to_fp16, x = input_277)[name = string("op_8684_cast_fp16")]; tensor var_8687 = mul(x = var_8666_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8687")]; tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_61 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = var_8666_cast_fp16)[name = string("x1_61")]; tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_61 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = var_8666_cast_fp16)[name = string("x2_61")]; fp16 const_275_promoted = const()[name = string("const_275_promoted"), val = fp16(-0x1p+0)]; tensor var_8708 = mul(x = x2_61, y = const_275_promoted)[name = string("op_8708")]; int32 var_8710 = const()[name = string("op_8710"), val = int32(-1)]; bool var_8711_interleave_0 = const()[name = string("op_8711_interleave_0"), val = bool(false)]; tensor var_8711 = concat(axis = var_8710, interleave = var_8711_interleave_0, values = (var_8708, x1_61))[name = string("op_8711")]; tensor var_8712 = mul(x = var_8711, y = sin_1_cast_fp16)[name = string("op_8712")]; tensor query_states_61 = add(x = var_8687, y = var_8712)[name = string("query_states_61")]; tensor var_8715 = mul(x = var_8684_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8715")]; tensor x1_63_begin_0 = const()[name = string("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_63_end_0 = const()[name = string("x1_63_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_63_end_mask_0 = const()[name = string("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_63 = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = var_8684_cast_fp16)[name = string("x1_63")]; tensor x2_63_begin_0 = const()[name = string("x2_63_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_63_end_0 = const()[name = string("x2_63_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_63_end_mask_0 = const()[name = string("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_63 = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = var_8684_cast_fp16)[name = string("x2_63")]; fp16 const_278_promoted = const()[name = string("const_278_promoted"), val = fp16(-0x1p+0)]; tensor var_8736 = mul(x = x2_63, y = const_278_promoted)[name = string("op_8736")]; int32 var_8738 = const()[name = string("op_8738"), val = int32(-1)]; bool var_8739_interleave_0 = const()[name = string("op_8739_interleave_0"), val = bool(false)]; tensor var_8739 = concat(axis = var_8738, interleave = var_8739_interleave_0, values = (var_8736, x1_63))[name = string("op_8739")]; tensor var_8740 = mul(x = var_8739, y = sin_1_cast_fp16)[name = string("op_8740")]; tensor key_states_61 = add(x = var_8715, y = var_8740)[name = string("key_states_61")]; tensor expand_dims_180 = const()[name = string("expand_dims_180"), val = tensor([15])]; tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([0])]; tensor expand_dims_183 = const()[name = string("expand_dims_183"), val = tensor([0])]; tensor expand_dims_184 = const()[name = string("expand_dims_184"), val = tensor([16])]; int32 concat_122_axis_0 = const()[name = string("concat_122_axis_0"), val = int32(0)]; bool concat_122_interleave_0 = const()[name = string("concat_122_interleave_0"), val = bool(false)]; tensor concat_122 = concat(axis = concat_122_axis_0, interleave = concat_122_interleave_0, values = (expand_dims_180, expand_dims_181, current_pos, expand_dims_183))[name = string("concat_122")]; tensor concat_123_values1_0 = const()[name = string("concat_123_values1_0"), val = tensor([0])]; tensor concat_123_values3_0 = const()[name = string("concat_123_values3_0"), val = tensor([0])]; int32 concat_123_axis_0 = const()[name = string("concat_123_axis_0"), val = int32(0)]; bool concat_123_interleave_0 = const()[name = string("concat_123_interleave_0"), val = bool(false)]; tensor concat_123 = concat(axis = concat_123_axis_0, interleave = concat_123_interleave_0, values = (expand_dims_184, concat_123_values1_0, var_1725, concat_123_values3_0))[name = string("concat_123")]; tensor model_model_kv_cache_0_internal_tensor_assign_31_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_122, begin_mask = model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0, end = concat_123, end_mask = model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_31_stride_0, update = key_states_61, x = coreml_update_state_85)[name = string("model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_30_write_state")]; tensor coreml_update_state_86 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_30")]; tensor expand_dims_186 = const()[name = string("expand_dims_186"), val = tensor([43])]; tensor expand_dims_187 = const()[name = string("expand_dims_187"), val = tensor([0])]; tensor expand_dims_189 = const()[name = string("expand_dims_189"), val = tensor([0])]; tensor expand_dims_190 = const()[name = string("expand_dims_190"), val = tensor([44])]; int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)]; bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)]; tensor concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (expand_dims_186, expand_dims_187, current_pos, expand_dims_189))[name = string("concat_126")]; tensor concat_127_values1_0 = const()[name = string("concat_127_values1_0"), val = tensor([0])]; tensor concat_127_values3_0 = const()[name = string("concat_127_values3_0"), val = tensor([0])]; int32 concat_127_axis_0 = const()[name = string("concat_127_axis_0"), val = int32(0)]; bool concat_127_interleave_0 = const()[name = string("concat_127_interleave_0"), val = bool(false)]; tensor concat_127 = concat(axis = concat_127_axis_0, interleave = concat_127_interleave_0, values = (expand_dims_190, concat_127_values1_0, var_1725, concat_127_values3_0))[name = string("concat_127")]; tensor model_model_kv_cache_0_internal_tensor_assign_32_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_126, begin_mask = model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0, end = concat_127, end_mask = model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_32_stride_0, update = var_8645, x = coreml_update_state_86)[name = string("model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_31_write_state")]; tensor coreml_update_state_87 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_31")]; tensor var_8795_begin_0 = const()[name = string("op_8795_begin_0"), val = tensor([15, 0, 0, 0])]; tensor var_8795_end_0 = const()[name = string("op_8795_end_0"), val = tensor([16, 8, 1024, 128])]; tensor var_8795_end_mask_0 = const()[name = string("op_8795_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8795_cast_fp16 = slice_by_index(begin = var_8795_begin_0, end = var_8795_end_0, end_mask = var_8795_end_mask_0, x = coreml_update_state_87)[name = string("op_8795_cast_fp16")]; tensor K_layer_cache_31_axes_0 = const()[name = string("K_layer_cache_31_axes_0"), val = tensor([0])]; tensor K_layer_cache_31_cast_fp16 = squeeze(axes = K_layer_cache_31_axes_0, x = var_8795_cast_fp16)[name = string("K_layer_cache_31_cast_fp16")]; tensor var_8802_begin_0 = const()[name = string("op_8802_begin_0"), val = tensor([43, 0, 0, 0])]; tensor var_8802_end_0 = const()[name = string("op_8802_end_0"), val = tensor([44, 8, 1024, 128])]; tensor var_8802_end_mask_0 = const()[name = string("op_8802_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8802_cast_fp16 = slice_by_index(begin = var_8802_begin_0, end = var_8802_end_0, end_mask = var_8802_end_mask_0, x = coreml_update_state_87)[name = string("op_8802_cast_fp16")]; tensor V_layer_cache_31_axes_0 = const()[name = string("V_layer_cache_31_axes_0"), val = tensor([0])]; tensor V_layer_cache_31_cast_fp16 = squeeze(axes = V_layer_cache_31_axes_0, x = var_8802_cast_fp16)[name = string("V_layer_cache_31_cast_fp16")]; tensor x_307_axes_0 = const()[name = string("x_307_axes_0"), val = tensor([1])]; tensor x_307_cast_fp16 = expand_dims(axes = x_307_axes_0, x = K_layer_cache_31_cast_fp16)[name = string("x_307_cast_fp16")]; tensor var_8839 = const()[name = string("op_8839"), val = tensor([1, 2, 1, 1])]; tensor x_309_cast_fp16 = tile(reps = var_8839, x = x_307_cast_fp16)[name = string("x_309_cast_fp16")]; tensor var_8851 = const()[name = string("op_8851"), val = tensor([1, -1, 1024, 128])]; tensor key_states_63_cast_fp16 = reshape(shape = var_8851, x = x_309_cast_fp16)[name = string("key_states_63_cast_fp16")]; tensor x_313_axes_0 = const()[name = string("x_313_axes_0"), val = tensor([1])]; tensor x_313_cast_fp16 = expand_dims(axes = x_313_axes_0, x = V_layer_cache_31_cast_fp16)[name = string("x_313_cast_fp16")]; tensor var_8859 = const()[name = string("op_8859"), val = tensor([1, 2, 1, 1])]; tensor x_315_cast_fp16 = tile(reps = var_8859, x = x_313_cast_fp16)[name = string("x_315_cast_fp16")]; tensor var_8871 = const()[name = string("op_8871"), val = tensor([1, -1, 1024, 128])]; tensor value_states_93_cast_fp16 = reshape(shape = var_8871, x = x_315_cast_fp16)[name = string("value_states_93_cast_fp16")]; bool var_8886_transpose_x_1 = const()[name = string("op_8886_transpose_x_1"), val = bool(false)]; bool var_8886_transpose_y_1 = const()[name = string("op_8886_transpose_y_1"), val = bool(true)]; tensor var_8886 = matmul(transpose_x = var_8886_transpose_x_1, transpose_y = var_8886_transpose_y_1, x = query_states_61, y = key_states_63_cast_fp16)[name = string("op_8886")]; fp16 var_8887_to_fp16 = const()[name = string("op_8887_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_91_cast_fp16 = mul(x = var_8886, y = var_8887_to_fp16)[name = string("attn_weights_91_cast_fp16")]; tensor attn_weights_93_cast_fp16 = add(x = attn_weights_91_cast_fp16, y = causal_mask)[name = string("attn_weights_93_cast_fp16")]; int32 var_8922 = const()[name = string("op_8922"), val = int32(-1)]; tensor attn_weights_95_cast_fp16 = softmax(axis = var_8922, x = attn_weights_93_cast_fp16)[name = string("attn_weights_95_cast_fp16")]; bool attn_output_151_transpose_x_0 = const()[name = string("attn_output_151_transpose_x_0"), val = bool(false)]; bool attn_output_151_transpose_y_0 = const()[name = string("attn_output_151_transpose_y_0"), val = bool(false)]; tensor attn_output_151_cast_fp16 = matmul(transpose_x = attn_output_151_transpose_x_0, transpose_y = attn_output_151_transpose_y_0, x = attn_weights_95_cast_fp16, y = value_states_93_cast_fp16)[name = string("attn_output_151_cast_fp16")]; tensor var_8933_perm_0 = const()[name = string("op_8933_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_8937 = const()[name = string("op_8937"), val = tensor([1, 1, 2048])]; tensor var_8933_cast_fp16 = transpose(perm = var_8933_perm_0, x = attn_output_151_cast_fp16)[name = string("transpose_76")]; tensor attn_output_155_cast_fp16 = reshape(shape = var_8937, x = var_8933_cast_fp16)[name = string("attn_output_155_cast_fp16")]; tensor var_8942 = const()[name = string("op_8942"), val = tensor([0, 2, 1])]; string var_8958_pad_type_0 = const()[name = string("op_8958_pad_type_0"), val = string("valid")]; int32 var_8958_groups_0 = const()[name = string("op_8958_groups_0"), val = int32(1)]; tensor var_8958_strides_0 = const()[name = string("op_8958_strides_0"), val = tensor([1])]; tensor var_8958_pad_0 = const()[name = string("op_8958_pad_0"), val = tensor([0, 0])]; tensor var_8958_dilations_0 = const()[name = string("op_8958_dilations_0"), val = tensor([1])]; tensor squeeze_15_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1058536448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1061682240))))[name = string("squeeze_15_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_8943_cast_fp16 = transpose(perm = var_8942, x = attn_output_155_cast_fp16)[name = string("transpose_75")]; tensor var_8958_cast_fp16 = conv(dilations = var_8958_dilations_0, groups = var_8958_groups_0, pad = var_8958_pad_0, pad_type = var_8958_pad_type_0, strides = var_8958_strides_0, weight = squeeze_15_cast_fp16_to_fp32_to_fp16_palettized, x = var_8943_cast_fp16)[name = string("op_8958_cast_fp16")]; tensor var_8962 = const()[name = string("op_8962"), val = tensor([0, 2, 1])]; tensor attn_output_159_cast_fp16 = transpose(perm = var_8962, x = var_8958_cast_fp16)[name = string("transpose_74")]; tensor hidden_states_95_cast_fp16 = add(x = hidden_states_91_cast_fp16, y = attn_output_159_cast_fp16)[name = string("hidden_states_95_cast_fp16")]; tensor mean_127_axes_0 = const()[name = string("mean_127_axes_0"), val = tensor([-1])]; bool mean_127_keep_dims_0 = const()[name = string("mean_127_keep_dims_0"), val = bool(true)]; tensor mean_127_cast_fp16 = reduce_mean(axes = mean_127_axes_0, keep_dims = mean_127_keep_dims_0, x = hidden_states_95_cast_fp16)[name = string("mean_127_cast_fp16")]; tensor input_281_cast_fp16 = sub(x = hidden_states_95_cast_fp16, y = mean_127_cast_fp16)[name = string("input_281_cast_fp16")]; tensor var_8981_axes_0 = const()[name = string("op_8981_axes_0"), val = tensor([-1])]; tensor model_model_layers_15_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1061715072)))]; fp16 var_8969_to_fp16 = const()[name = string("op_8969_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8981_cast_fp16 = layer_norm(axes = var_8981_axes_0, epsilon = var_8969_to_fp16, gamma = model_model_layers_15_post_attention_layernorm_weight_to_fp16, x = input_281_cast_fp16)[name = string("op_8981_cast_fp16")]; tensor var_8995 = const()[name = string("op_8995"), val = tensor([0, 2, 1])]; tensor input_283_axes_0 = const()[name = string("input_283_axes_0"), val = tensor([2])]; tensor var_8996 = transpose(perm = var_8995, x = var_8981_cast_fp16)[name = string("transpose_73")]; tensor input_283 = expand_dims(axes = input_283_axes_0, x = var_8996)[name = string("input_283")]; string input_285_pad_type_0 = const()[name = string("input_285_pad_type_0"), val = string("valid")]; tensor input_285_strides_0 = const()[name = string("input_285_strides_0"), val = tensor([1, 1])]; tensor input_285_pad_0 = const()[name = string("input_285_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_285_dilations_0 = const()[name = string("input_285_dilations_0"), val = tensor([1, 1])]; int32 input_285_groups_0 = const()[name = string("input_285_groups_0"), val = int32(1)]; tensor input_285 = conv(dilations = input_285_dilations_0, groups = input_285_groups_0, pad = input_285_pad_0, pad_type = input_285_pad_type_0, strides = input_285_strides_0, weight = model_model_layers_15_mlp_gate_proj_weight_palettized, x = input_283)[name = string("input_285")]; string b_31_pad_type_0 = const()[name = string("b_31_pad_type_0"), val = string("valid")]; tensor b_31_strides_0 = const()[name = string("b_31_strides_0"), val = tensor([1, 1])]; tensor b_31_pad_0 = const()[name = string("b_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_31_dilations_0 = const()[name = string("b_31_dilations_0"), val = tensor([1, 1])]; int32 b_31_groups_0 = const()[name = string("b_31_groups_0"), val = int32(1)]; tensor b_31 = conv(dilations = b_31_dilations_0, groups = b_31_groups_0, pad = b_31_pad_0, pad_type = b_31_pad_type_0, strides = b_31_strides_0, weight = model_model_layers_15_mlp_up_proj_weight_palettized, x = input_283)[name = string("b_31")]; tensor c_31 = silu(x = input_285)[name = string("c_31")]; tensor input_287 = mul(x = c_31, y = b_31)[name = string("input_287")]; string e_31_pad_type_0 = const()[name = string("e_31_pad_type_0"), val = string("valid")]; tensor e_31_strides_0 = const()[name = string("e_31_strides_0"), val = tensor([1, 1])]; tensor e_31_pad_0 = const()[name = string("e_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_31_dilations_0 = const()[name = string("e_31_dilations_0"), val = tensor([1, 1])]; int32 e_31_groups_0 = const()[name = string("e_31_groups_0"), val = int32(1)]; tensor e_31 = conv(dilations = e_31_dilations_0, groups = e_31_groups_0, pad = e_31_pad_0, pad_type = e_31_pad_type_0, strides = e_31_strides_0, weight = model_model_layers_15_mlp_down_proj_weight_palettized, x = input_287)[name = string("e_31")]; tensor var_9018_axes_0 = const()[name = string("op_9018_axes_0"), val = tensor([2])]; tensor var_9018 = squeeze(axes = var_9018_axes_0, x = e_31)[name = string("op_9018")]; tensor var_9019 = const()[name = string("op_9019"), val = tensor([0, 2, 1])]; tensor var_9020 = transpose(perm = var_9019, x = var_9018)[name = string("transpose_72")]; tensor hidden_states_97_cast_fp16 = add(x = hidden_states_95_cast_fp16, y = var_9020)[name = string("hidden_states_97_cast_fp16")]; tensor mean_129_axes_0 = const()[name = string("mean_129_axes_0"), val = tensor([-1])]; bool mean_129_keep_dims_0 = const()[name = string("mean_129_keep_dims_0"), val = bool(true)]; tensor mean_129_cast_fp16 = reduce_mean(axes = mean_129_axes_0, keep_dims = mean_129_keep_dims_0, x = hidden_states_97_cast_fp16)[name = string("mean_129_cast_fp16")]; tensor input_289_cast_fp16 = sub(x = hidden_states_97_cast_fp16, y = mean_129_cast_fp16)[name = string("input_289_cast_fp16")]; tensor var_9038_axes_0 = const()[name = string("op_9038_axes_0"), val = tensor([-1])]; tensor model_model_layers_16_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_16_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1061719232)))]; fp16 var_9026_to_fp16 = const()[name = string("op_9026_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9038_cast_fp16 = layer_norm(axes = var_9038_axes_0, epsilon = var_9026_to_fp16, gamma = model_model_layers_16_input_layernorm_weight_to_fp16, x = input_289_cast_fp16)[name = string("op_9038_cast_fp16")]; tensor var_9044 = const()[name = string("op_9044"), val = tensor([0, 2, 1])]; tensor var_9047_axes_0 = const()[name = string("op_9047_axes_0"), val = tensor([2])]; tensor var_9045 = transpose(perm = var_9044, x = var_9038_cast_fp16)[name = string("transpose_71")]; tensor var_9047 = expand_dims(axes = var_9047_axes_0, x = var_9045)[name = string("op_9047")]; string var_9063_pad_type_0 = const()[name = string("op_9063_pad_type_0"), val = string("valid")]; tensor var_9063_strides_0 = const()[name = string("op_9063_strides_0"), val = tensor([1, 1])]; tensor var_9063_pad_0 = const()[name = string("op_9063_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9063_dilations_0 = const()[name = string("op_9063_dilations_0"), val = tensor([1, 1])]; int32 var_9063_groups_0 = const()[name = string("op_9063_groups_0"), val = int32(1)]; tensor var_9063 = conv(dilations = var_9063_dilations_0, groups = var_9063_groups_0, pad = var_9063_pad_0, pad_type = var_9063_pad_type_0, strides = var_9063_strides_0, weight = model_model_layers_16_self_attn_q_proj_weight_palettized, x = var_9047)[name = string("op_9063")]; tensor var_9068 = const()[name = string("op_9068"), val = tensor([1, 16, 1, 128])]; tensor var_9069 = reshape(shape = var_9068, x = var_9063)[name = string("op_9069")]; string var_9085_pad_type_0 = const()[name = string("op_9085_pad_type_0"), val = string("valid")]; tensor var_9085_strides_0 = const()[name = string("op_9085_strides_0"), val = tensor([1, 1])]; tensor var_9085_pad_0 = const()[name = string("op_9085_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9085_dilations_0 = const()[name = string("op_9085_dilations_0"), val = tensor([1, 1])]; int32 var_9085_groups_0 = const()[name = string("op_9085_groups_0"), val = int32(1)]; tensor var_9085 = conv(dilations = var_9085_dilations_0, groups = var_9085_groups_0, pad = var_9085_pad_0, pad_type = var_9085_pad_type_0, strides = var_9085_strides_0, weight = model_model_layers_16_self_attn_k_proj_weight_palettized, x = var_9047)[name = string("op_9085")]; tensor var_9090 = const()[name = string("op_9090"), val = tensor([1, 8, 1, 128])]; tensor var_9091 = reshape(shape = var_9090, x = var_9085)[name = string("op_9091")]; string var_9107_pad_type_0 = const()[name = string("op_9107_pad_type_0"), val = string("valid")]; tensor var_9107_strides_0 = const()[name = string("op_9107_strides_0"), val = tensor([1, 1])]; tensor var_9107_pad_0 = const()[name = string("op_9107_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9107_dilations_0 = const()[name = string("op_9107_dilations_0"), val = tensor([1, 1])]; int32 var_9107_groups_0 = const()[name = string("op_9107_groups_0"), val = int32(1)]; tensor var_9107 = conv(dilations = var_9107_dilations_0, groups = var_9107_groups_0, pad = var_9107_pad_0, pad_type = var_9107_pad_type_0, strides = var_9107_strides_0, weight = model_model_layers_16_self_attn_v_proj_weight_palettized, x = var_9047)[name = string("op_9107")]; tensor var_9112 = const()[name = string("op_9112"), val = tensor([1, 8, 1, 128])]; tensor var_9113 = reshape(shape = var_9112, x = var_9107)[name = string("op_9113")]; tensor mean_131_axes_0 = const()[name = string("mean_131_axes_0"), val = tensor([-1])]; bool mean_131_keep_dims_0 = const()[name = string("mean_131_keep_dims_0"), val = bool(true)]; tensor mean_131 = reduce_mean(axes = mean_131_axes_0, keep_dims = mean_131_keep_dims_0, x = var_9069)[name = string("mean_131")]; tensor input_293 = sub(x = var_9069, y = mean_131)[name = string("input_293")]; tensor var_9134_axes_0 = const()[name = string("op_9134_axes_0"), val = tensor([-1])]; tensor model_model_layers_16_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_16_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1061723392)))]; fp16 var_9122_to_fp16 = const()[name = string("op_9122_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9134_cast_fp16 = layer_norm(axes = var_9134_axes_0, epsilon = var_9122_to_fp16, gamma = model_model_layers_16_self_attn_q_norm_weight_to_fp16, x = input_293)[name = string("op_9134_cast_fp16")]; tensor mean_133_axes_0 = const()[name = string("mean_133_axes_0"), val = tensor([-1])]; bool mean_133_keep_dims_0 = const()[name = string("mean_133_keep_dims_0"), val = bool(true)]; tensor mean_133 = reduce_mean(axes = mean_133_axes_0, keep_dims = mean_133_keep_dims_0, x = var_9091)[name = string("mean_133")]; tensor input_295 = sub(x = var_9091, y = mean_133)[name = string("input_295")]; tensor var_9152_axes_0 = const()[name = string("op_9152_axes_0"), val = tensor([-1])]; tensor model_model_layers_16_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_16_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1061723712)))]; fp16 var_9140_to_fp16 = const()[name = string("op_9140_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9152_cast_fp16 = layer_norm(axes = var_9152_axes_0, epsilon = var_9140_to_fp16, gamma = model_model_layers_16_self_attn_k_norm_weight_to_fp16, x = input_295)[name = string("op_9152_cast_fp16")]; tensor var_9155 = mul(x = var_9134_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9155")]; tensor x1_65_begin_0 = const()[name = string("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_65_end_0 = const()[name = string("x1_65_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_65_end_mask_0 = const()[name = string("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_65 = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = var_9134_cast_fp16)[name = string("x1_65")]; tensor x2_65_begin_0 = const()[name = string("x2_65_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_65_end_0 = const()[name = string("x2_65_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_65_end_mask_0 = const()[name = string("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_65 = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = var_9134_cast_fp16)[name = string("x2_65")]; fp16 const_293_promoted = const()[name = string("const_293_promoted"), val = fp16(-0x1p+0)]; tensor var_9176 = mul(x = x2_65, y = const_293_promoted)[name = string("op_9176")]; int32 var_9178 = const()[name = string("op_9178"), val = int32(-1)]; bool var_9179_interleave_0 = const()[name = string("op_9179_interleave_0"), val = bool(false)]; tensor var_9179 = concat(axis = var_9178, interleave = var_9179_interleave_0, values = (var_9176, x1_65))[name = string("op_9179")]; tensor var_9180 = mul(x = var_9179, y = sin_1_cast_fp16)[name = string("op_9180")]; tensor query_states_65 = add(x = var_9155, y = var_9180)[name = string("query_states_65")]; tensor var_9183 = mul(x = var_9152_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9183")]; tensor x1_67_begin_0 = const()[name = string("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_67_end_0 = const()[name = string("x1_67_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_67_end_mask_0 = const()[name = string("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_67 = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = var_9152_cast_fp16)[name = string("x1_67")]; tensor x2_67_begin_0 = const()[name = string("x2_67_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_67_end_0 = const()[name = string("x2_67_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_67_end_mask_0 = const()[name = string("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_67 = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = var_9152_cast_fp16)[name = string("x2_67")]; fp16 const_296_promoted = const()[name = string("const_296_promoted"), val = fp16(-0x1p+0)]; tensor var_9204 = mul(x = x2_67, y = const_296_promoted)[name = string("op_9204")]; int32 var_9206 = const()[name = string("op_9206"), val = int32(-1)]; bool var_9207_interleave_0 = const()[name = string("op_9207_interleave_0"), val = bool(false)]; tensor var_9207 = concat(axis = var_9206, interleave = var_9207_interleave_0, values = (var_9204, x1_67))[name = string("op_9207")]; tensor var_9208 = mul(x = var_9207, y = sin_1_cast_fp16)[name = string("op_9208")]; tensor key_states_65 = add(x = var_9183, y = var_9208)[name = string("key_states_65")]; tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([16])]; tensor expand_dims_193 = const()[name = string("expand_dims_193"), val = tensor([0])]; tensor expand_dims_195 = const()[name = string("expand_dims_195"), val = tensor([0])]; tensor expand_dims_196 = const()[name = string("expand_dims_196"), val = tensor([17])]; int32 concat_130_axis_0 = const()[name = string("concat_130_axis_0"), val = int32(0)]; bool concat_130_interleave_0 = const()[name = string("concat_130_interleave_0"), val = bool(false)]; tensor concat_130 = concat(axis = concat_130_axis_0, interleave = concat_130_interleave_0, values = (expand_dims_192, expand_dims_193, current_pos, expand_dims_195))[name = string("concat_130")]; tensor concat_131_values1_0 = const()[name = string("concat_131_values1_0"), val = tensor([0])]; tensor concat_131_values3_0 = const()[name = string("concat_131_values3_0"), val = tensor([0])]; int32 concat_131_axis_0 = const()[name = string("concat_131_axis_0"), val = int32(0)]; bool concat_131_interleave_0 = const()[name = string("concat_131_interleave_0"), val = bool(false)]; tensor concat_131 = concat(axis = concat_131_axis_0, interleave = concat_131_interleave_0, values = (expand_dims_196, concat_131_values1_0, var_1725, concat_131_values3_0))[name = string("concat_131")]; tensor model_model_kv_cache_0_internal_tensor_assign_33_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_33_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_33_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_33_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_33_cast_fp16 = slice_update(begin = concat_130, begin_mask = model_model_kv_cache_0_internal_tensor_assign_33_begin_mask_0, end = concat_131, end_mask = model_model_kv_cache_0_internal_tensor_assign_33_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_33_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_33_stride_0, update = key_states_65, x = coreml_update_state_87)[name = string("model_model_kv_cache_0_internal_tensor_assign_33_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_33_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_32_write_state")]; tensor coreml_update_state_88 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_32")]; tensor expand_dims_198 = const()[name = string("expand_dims_198"), val = tensor([44])]; tensor expand_dims_199 = const()[name = string("expand_dims_199"), val = tensor([0])]; tensor expand_dims_201 = const()[name = string("expand_dims_201"), val = tensor([0])]; tensor expand_dims_202 = const()[name = string("expand_dims_202"), val = tensor([45])]; int32 concat_134_axis_0 = const()[name = string("concat_134_axis_0"), val = int32(0)]; bool concat_134_interleave_0 = const()[name = string("concat_134_interleave_0"), val = bool(false)]; tensor concat_134 = concat(axis = concat_134_axis_0, interleave = concat_134_interleave_0, values = (expand_dims_198, expand_dims_199, current_pos, expand_dims_201))[name = string("concat_134")]; tensor concat_135_values1_0 = const()[name = string("concat_135_values1_0"), val = tensor([0])]; tensor concat_135_values3_0 = const()[name = string("concat_135_values3_0"), val = tensor([0])]; int32 concat_135_axis_0 = const()[name = string("concat_135_axis_0"), val = int32(0)]; bool concat_135_interleave_0 = const()[name = string("concat_135_interleave_0"), val = bool(false)]; tensor concat_135 = concat(axis = concat_135_axis_0, interleave = concat_135_interleave_0, values = (expand_dims_202, concat_135_values1_0, var_1725, concat_135_values3_0))[name = string("concat_135")]; tensor model_model_kv_cache_0_internal_tensor_assign_34_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_34_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_34_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_34_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_34_cast_fp16 = slice_update(begin = concat_134, begin_mask = model_model_kv_cache_0_internal_tensor_assign_34_begin_mask_0, end = concat_135, end_mask = model_model_kv_cache_0_internal_tensor_assign_34_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_34_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_34_stride_0, update = var_9113, x = coreml_update_state_88)[name = string("model_model_kv_cache_0_internal_tensor_assign_34_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_34_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_33_write_state")]; tensor coreml_update_state_89 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_33")]; tensor var_9263_begin_0 = const()[name = string("op_9263_begin_0"), val = tensor([16, 0, 0, 0])]; tensor var_9263_end_0 = const()[name = string("op_9263_end_0"), val = tensor([17, 8, 1024, 128])]; tensor var_9263_end_mask_0 = const()[name = string("op_9263_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_9263_cast_fp16 = slice_by_index(begin = var_9263_begin_0, end = var_9263_end_0, end_mask = var_9263_end_mask_0, x = coreml_update_state_89)[name = string("op_9263_cast_fp16")]; tensor K_layer_cache_33_axes_0 = const()[name = string("K_layer_cache_33_axes_0"), val = tensor([0])]; tensor K_layer_cache_33_cast_fp16 = squeeze(axes = K_layer_cache_33_axes_0, x = var_9263_cast_fp16)[name = string("K_layer_cache_33_cast_fp16")]; tensor var_9270_begin_0 = const()[name = string("op_9270_begin_0"), val = tensor([44, 0, 0, 0])]; tensor var_9270_end_0 = const()[name = string("op_9270_end_0"), val = tensor([45, 8, 1024, 128])]; tensor var_9270_end_mask_0 = const()[name = string("op_9270_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_9270_cast_fp16 = slice_by_index(begin = var_9270_begin_0, end = var_9270_end_0, end_mask = var_9270_end_mask_0, x = coreml_update_state_89)[name = string("op_9270_cast_fp16")]; tensor V_layer_cache_33_axes_0 = const()[name = string("V_layer_cache_33_axes_0"), val = tensor([0])]; tensor V_layer_cache_33_cast_fp16 = squeeze(axes = V_layer_cache_33_axes_0, x = var_9270_cast_fp16)[name = string("V_layer_cache_33_cast_fp16")]; tensor x_327_axes_0 = const()[name = string("x_327_axes_0"), val = tensor([1])]; tensor x_327_cast_fp16 = expand_dims(axes = x_327_axes_0, x = K_layer_cache_33_cast_fp16)[name = string("x_327_cast_fp16")]; tensor var_9307 = const()[name = string("op_9307"), val = tensor([1, 2, 1, 1])]; tensor x_329_cast_fp16 = tile(reps = var_9307, x = x_327_cast_fp16)[name = string("x_329_cast_fp16")]; tensor var_9319 = const()[name = string("op_9319"), val = tensor([1, -1, 1024, 128])]; tensor key_states_67_cast_fp16 = reshape(shape = var_9319, x = x_329_cast_fp16)[name = string("key_states_67_cast_fp16")]; tensor x_333_axes_0 = const()[name = string("x_333_axes_0"), val = tensor([1])]; tensor x_333_cast_fp16 = expand_dims(axes = x_333_axes_0, x = V_layer_cache_33_cast_fp16)[name = string("x_333_cast_fp16")]; tensor var_9327 = const()[name = string("op_9327"), val = tensor([1, 2, 1, 1])]; tensor x_335_cast_fp16 = tile(reps = var_9327, x = x_333_cast_fp16)[name = string("x_335_cast_fp16")]; tensor var_9339 = const()[name = string("op_9339"), val = tensor([1, -1, 1024, 128])]; tensor value_states_99_cast_fp16 = reshape(shape = var_9339, x = x_335_cast_fp16)[name = string("value_states_99_cast_fp16")]; bool var_9354_transpose_x_1 = const()[name = string("op_9354_transpose_x_1"), val = bool(false)]; bool var_9354_transpose_y_1 = const()[name = string("op_9354_transpose_y_1"), val = bool(true)]; tensor var_9354 = matmul(transpose_x = var_9354_transpose_x_1, transpose_y = var_9354_transpose_y_1, x = query_states_65, y = key_states_67_cast_fp16)[name = string("op_9354")]; fp16 var_9355_to_fp16 = const()[name = string("op_9355_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_97_cast_fp16 = mul(x = var_9354, y = var_9355_to_fp16)[name = string("attn_weights_97_cast_fp16")]; tensor attn_weights_99_cast_fp16 = add(x = attn_weights_97_cast_fp16, y = causal_mask)[name = string("attn_weights_99_cast_fp16")]; int32 var_9390 = const()[name = string("op_9390"), val = int32(-1)]; tensor attn_weights_101_cast_fp16 = softmax(axis = var_9390, x = attn_weights_99_cast_fp16)[name = string("attn_weights_101_cast_fp16")]; bool attn_output_161_transpose_x_0 = const()[name = string("attn_output_161_transpose_x_0"), val = bool(false)]; bool attn_output_161_transpose_y_0 = const()[name = string("attn_output_161_transpose_y_0"), val = bool(false)]; tensor attn_output_161_cast_fp16 = matmul(transpose_x = attn_output_161_transpose_x_0, transpose_y = attn_output_161_transpose_y_0, x = attn_weights_101_cast_fp16, y = value_states_99_cast_fp16)[name = string("attn_output_161_cast_fp16")]; tensor var_9401_perm_0 = const()[name = string("op_9401_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_9405 = const()[name = string("op_9405"), val = tensor([1, 1, 2048])]; tensor var_9401_cast_fp16 = transpose(perm = var_9401_perm_0, x = attn_output_161_cast_fp16)[name = string("transpose_70")]; tensor attn_output_165_cast_fp16 = reshape(shape = var_9405, x = var_9401_cast_fp16)[name = string("attn_output_165_cast_fp16")]; tensor var_9410 = const()[name = string("op_9410"), val = tensor([0, 2, 1])]; string var_9426_pad_type_0 = const()[name = string("op_9426_pad_type_0"), val = string("valid")]; int32 var_9426_groups_0 = const()[name = string("op_9426_groups_0"), val = int32(1)]; tensor var_9426_strides_0 = const()[name = string("op_9426_strides_0"), val = tensor([1])]; tensor var_9426_pad_0 = const()[name = string("op_9426_pad_0"), val = tensor([0, 0])]; tensor var_9426_dilations_0 = const()[name = string("op_9426_dilations_0"), val = tensor([1])]; tensor squeeze_16_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1061724032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1064869824))))[name = string("squeeze_16_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_9411_cast_fp16 = transpose(perm = var_9410, x = attn_output_165_cast_fp16)[name = string("transpose_69")]; tensor var_9426_cast_fp16 = conv(dilations = var_9426_dilations_0, groups = var_9426_groups_0, pad = var_9426_pad_0, pad_type = var_9426_pad_type_0, strides = var_9426_strides_0, weight = squeeze_16_cast_fp16_to_fp32_to_fp16_palettized, x = var_9411_cast_fp16)[name = string("op_9426_cast_fp16")]; tensor var_9430 = const()[name = string("op_9430"), val = tensor([0, 2, 1])]; tensor attn_output_169_cast_fp16 = transpose(perm = var_9430, x = var_9426_cast_fp16)[name = string("transpose_68")]; tensor hidden_states_101_cast_fp16 = add(x = hidden_states_97_cast_fp16, y = attn_output_169_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; tensor mean_135_axes_0 = const()[name = string("mean_135_axes_0"), val = tensor([-1])]; bool mean_135_keep_dims_0 = const()[name = string("mean_135_keep_dims_0"), val = bool(true)]; tensor mean_135_cast_fp16 = reduce_mean(axes = mean_135_axes_0, keep_dims = mean_135_keep_dims_0, x = hidden_states_101_cast_fp16)[name = string("mean_135_cast_fp16")]; tensor input_299_cast_fp16 = sub(x = hidden_states_101_cast_fp16, y = mean_135_cast_fp16)[name = string("input_299_cast_fp16")]; tensor var_9449_axes_0 = const()[name = string("op_9449_axes_0"), val = tensor([-1])]; tensor model_model_layers_16_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_16_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1064902656)))]; fp16 var_9437_to_fp16 = const()[name = string("op_9437_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9449_cast_fp16 = layer_norm(axes = var_9449_axes_0, epsilon = var_9437_to_fp16, gamma = model_model_layers_16_post_attention_layernorm_weight_to_fp16, x = input_299_cast_fp16)[name = string("op_9449_cast_fp16")]; tensor var_9463 = const()[name = string("op_9463"), val = tensor([0, 2, 1])]; tensor input_301_axes_0 = const()[name = string("input_301_axes_0"), val = tensor([2])]; tensor var_9464 = transpose(perm = var_9463, x = var_9449_cast_fp16)[name = string("transpose_67")]; tensor input_301 = expand_dims(axes = input_301_axes_0, x = var_9464)[name = string("input_301")]; string input_303_pad_type_0 = const()[name = string("input_303_pad_type_0"), val = string("valid")]; tensor input_303_strides_0 = const()[name = string("input_303_strides_0"), val = tensor([1, 1])]; tensor input_303_pad_0 = const()[name = string("input_303_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_303_dilations_0 = const()[name = string("input_303_dilations_0"), val = tensor([1, 1])]; int32 input_303_groups_0 = const()[name = string("input_303_groups_0"), val = int32(1)]; tensor input_303 = conv(dilations = input_303_dilations_0, groups = input_303_groups_0, pad = input_303_pad_0, pad_type = input_303_pad_type_0, strides = input_303_strides_0, weight = model_model_layers_16_mlp_gate_proj_weight_palettized, x = input_301)[name = string("input_303")]; string b_33_pad_type_0 = const()[name = string("b_33_pad_type_0"), val = string("valid")]; tensor b_33_strides_0 = const()[name = string("b_33_strides_0"), val = tensor([1, 1])]; tensor b_33_pad_0 = const()[name = string("b_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_33_dilations_0 = const()[name = string("b_33_dilations_0"), val = tensor([1, 1])]; int32 b_33_groups_0 = const()[name = string("b_33_groups_0"), val = int32(1)]; tensor b_33 = conv(dilations = b_33_dilations_0, groups = b_33_groups_0, pad = b_33_pad_0, pad_type = b_33_pad_type_0, strides = b_33_strides_0, weight = model_model_layers_16_mlp_up_proj_weight_palettized, x = input_301)[name = string("b_33")]; tensor c_33 = silu(x = input_303)[name = string("c_33")]; tensor input_305 = mul(x = c_33, y = b_33)[name = string("input_305")]; string e_33_pad_type_0 = const()[name = string("e_33_pad_type_0"), val = string("valid")]; tensor e_33_strides_0 = const()[name = string("e_33_strides_0"), val = tensor([1, 1])]; tensor e_33_pad_0 = const()[name = string("e_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_33_dilations_0 = const()[name = string("e_33_dilations_0"), val = tensor([1, 1])]; int32 e_33_groups_0 = const()[name = string("e_33_groups_0"), val = int32(1)]; tensor e_33 = conv(dilations = e_33_dilations_0, groups = e_33_groups_0, pad = e_33_pad_0, pad_type = e_33_pad_type_0, strides = e_33_strides_0, weight = model_model_layers_16_mlp_down_proj_weight_palettized, x = input_305)[name = string("e_33")]; tensor var_9486_axes_0 = const()[name = string("op_9486_axes_0"), val = tensor([2])]; tensor var_9486 = squeeze(axes = var_9486_axes_0, x = e_33)[name = string("op_9486")]; tensor var_9487 = const()[name = string("op_9487"), val = tensor([0, 2, 1])]; tensor var_9488 = transpose(perm = var_9487, x = var_9486)[name = string("transpose_66")]; tensor hidden_states_103_cast_fp16 = add(x = hidden_states_101_cast_fp16, y = var_9488)[name = string("hidden_states_103_cast_fp16")]; tensor mean_137_axes_0 = const()[name = string("mean_137_axes_0"), val = tensor([-1])]; bool mean_137_keep_dims_0 = const()[name = string("mean_137_keep_dims_0"), val = bool(true)]; tensor mean_137_cast_fp16 = reduce_mean(axes = mean_137_axes_0, keep_dims = mean_137_keep_dims_0, x = hidden_states_103_cast_fp16)[name = string("mean_137_cast_fp16")]; tensor input_307_cast_fp16 = sub(x = hidden_states_103_cast_fp16, y = mean_137_cast_fp16)[name = string("input_307_cast_fp16")]; tensor var_9506_axes_0 = const()[name = string("op_9506_axes_0"), val = tensor([-1])]; tensor model_model_layers_17_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_17_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1064906816)))]; fp16 var_9494_to_fp16 = const()[name = string("op_9494_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9506_cast_fp16 = layer_norm(axes = var_9506_axes_0, epsilon = var_9494_to_fp16, gamma = model_model_layers_17_input_layernorm_weight_to_fp16, x = input_307_cast_fp16)[name = string("op_9506_cast_fp16")]; tensor var_9512 = const()[name = string("op_9512"), val = tensor([0, 2, 1])]; tensor var_9515_axes_0 = const()[name = string("op_9515_axes_0"), val = tensor([2])]; tensor var_9513 = transpose(perm = var_9512, x = var_9506_cast_fp16)[name = string("transpose_65")]; tensor var_9515 = expand_dims(axes = var_9515_axes_0, x = var_9513)[name = string("op_9515")]; string var_9531_pad_type_0 = const()[name = string("op_9531_pad_type_0"), val = string("valid")]; tensor var_9531_strides_0 = const()[name = string("op_9531_strides_0"), val = tensor([1, 1])]; tensor var_9531_pad_0 = const()[name = string("op_9531_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9531_dilations_0 = const()[name = string("op_9531_dilations_0"), val = tensor([1, 1])]; int32 var_9531_groups_0 = const()[name = string("op_9531_groups_0"), val = int32(1)]; tensor var_9531 = conv(dilations = var_9531_dilations_0, groups = var_9531_groups_0, pad = var_9531_pad_0, pad_type = var_9531_pad_type_0, strides = var_9531_strides_0, weight = model_model_layers_17_self_attn_q_proj_weight_palettized, x = var_9515)[name = string("op_9531")]; tensor var_9536 = const()[name = string("op_9536"), val = tensor([1, 16, 1, 128])]; tensor var_9537 = reshape(shape = var_9536, x = var_9531)[name = string("op_9537")]; string var_9553_pad_type_0 = const()[name = string("op_9553_pad_type_0"), val = string("valid")]; tensor var_9553_strides_0 = const()[name = string("op_9553_strides_0"), val = tensor([1, 1])]; tensor var_9553_pad_0 = const()[name = string("op_9553_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9553_dilations_0 = const()[name = string("op_9553_dilations_0"), val = tensor([1, 1])]; int32 var_9553_groups_0 = const()[name = string("op_9553_groups_0"), val = int32(1)]; tensor var_9553 = conv(dilations = var_9553_dilations_0, groups = var_9553_groups_0, pad = var_9553_pad_0, pad_type = var_9553_pad_type_0, strides = var_9553_strides_0, weight = model_model_layers_17_self_attn_k_proj_weight_palettized, x = var_9515)[name = string("op_9553")]; tensor var_9558 = const()[name = string("op_9558"), val = tensor([1, 8, 1, 128])]; tensor var_9559 = reshape(shape = var_9558, x = var_9553)[name = string("op_9559")]; string var_9575_pad_type_0 = const()[name = string("op_9575_pad_type_0"), val = string("valid")]; tensor var_9575_strides_0 = const()[name = string("op_9575_strides_0"), val = tensor([1, 1])]; tensor var_9575_pad_0 = const()[name = string("op_9575_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9575_dilations_0 = const()[name = string("op_9575_dilations_0"), val = tensor([1, 1])]; int32 var_9575_groups_0 = const()[name = string("op_9575_groups_0"), val = int32(1)]; tensor var_9575 = conv(dilations = var_9575_dilations_0, groups = var_9575_groups_0, pad = var_9575_pad_0, pad_type = var_9575_pad_type_0, strides = var_9575_strides_0, weight = model_model_layers_17_self_attn_v_proj_weight_palettized, x = var_9515)[name = string("op_9575")]; tensor var_9580 = const()[name = string("op_9580"), val = tensor([1, 8, 1, 128])]; tensor var_9581 = reshape(shape = var_9580, x = var_9575)[name = string("op_9581")]; tensor mean_139_axes_0 = const()[name = string("mean_139_axes_0"), val = tensor([-1])]; bool mean_139_keep_dims_0 = const()[name = string("mean_139_keep_dims_0"), val = bool(true)]; tensor mean_139 = reduce_mean(axes = mean_139_axes_0, keep_dims = mean_139_keep_dims_0, x = var_9537)[name = string("mean_139")]; tensor input_311 = sub(x = var_9537, y = mean_139)[name = string("input_311")]; tensor var_9602_axes_0 = const()[name = string("op_9602_axes_0"), val = tensor([-1])]; tensor model_model_layers_17_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_17_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1064910976)))]; fp16 var_9590_to_fp16 = const()[name = string("op_9590_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9602_cast_fp16 = layer_norm(axes = var_9602_axes_0, epsilon = var_9590_to_fp16, gamma = model_model_layers_17_self_attn_q_norm_weight_to_fp16, x = input_311)[name = string("op_9602_cast_fp16")]; tensor mean_141_axes_0 = const()[name = string("mean_141_axes_0"), val = tensor([-1])]; bool mean_141_keep_dims_0 = const()[name = string("mean_141_keep_dims_0"), val = bool(true)]; tensor mean_141 = reduce_mean(axes = mean_141_axes_0, keep_dims = mean_141_keep_dims_0, x = var_9559)[name = string("mean_141")]; tensor input_313 = sub(x = var_9559, y = mean_141)[name = string("input_313")]; tensor var_9620_axes_0 = const()[name = string("op_9620_axes_0"), val = tensor([-1])]; tensor model_model_layers_17_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_17_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1064911296)))]; fp16 var_9608_to_fp16 = const()[name = string("op_9608_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9620_cast_fp16 = layer_norm(axes = var_9620_axes_0, epsilon = var_9608_to_fp16, gamma = model_model_layers_17_self_attn_k_norm_weight_to_fp16, x = input_313)[name = string("op_9620_cast_fp16")]; tensor var_9623 = mul(x = var_9602_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9623")]; tensor x1_69_begin_0 = const()[name = string("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_69_end_0 = const()[name = string("x1_69_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_69_end_mask_0 = const()[name = string("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_69 = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = var_9602_cast_fp16)[name = string("x1_69")]; tensor x2_69_begin_0 = const()[name = string("x2_69_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_69_end_0 = const()[name = string("x2_69_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_69_end_mask_0 = const()[name = string("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_69 = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = var_9602_cast_fp16)[name = string("x2_69")]; fp16 const_311_promoted = const()[name = string("const_311_promoted"), val = fp16(-0x1p+0)]; tensor var_9644 = mul(x = x2_69, y = const_311_promoted)[name = string("op_9644")]; int32 var_9646 = const()[name = string("op_9646"), val = int32(-1)]; bool var_9647_interleave_0 = const()[name = string("op_9647_interleave_0"), val = bool(false)]; tensor var_9647 = concat(axis = var_9646, interleave = var_9647_interleave_0, values = (var_9644, x1_69))[name = string("op_9647")]; tensor var_9648 = mul(x = var_9647, y = sin_1_cast_fp16)[name = string("op_9648")]; tensor query_states_69 = add(x = var_9623, y = var_9648)[name = string("query_states_69")]; tensor var_9651 = mul(x = var_9620_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9651")]; tensor x1_71_begin_0 = const()[name = string("x1_71_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_71_end_0 = const()[name = string("x1_71_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_71_end_mask_0 = const()[name = string("x1_71_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_71 = slice_by_index(begin = x1_71_begin_0, end = x1_71_end_0, end_mask = x1_71_end_mask_0, x = var_9620_cast_fp16)[name = string("x1_71")]; tensor x2_71_begin_0 = const()[name = string("x2_71_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_71_end_0 = const()[name = string("x2_71_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_71_end_mask_0 = const()[name = string("x2_71_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_71 = slice_by_index(begin = x2_71_begin_0, end = x2_71_end_0, end_mask = x2_71_end_mask_0, x = var_9620_cast_fp16)[name = string("x2_71")]; fp16 const_314_promoted = const()[name = string("const_314_promoted"), val = fp16(-0x1p+0)]; tensor var_9672 = mul(x = x2_71, y = const_314_promoted)[name = string("op_9672")]; int32 var_9674 = const()[name = string("op_9674"), val = int32(-1)]; bool var_9675_interleave_0 = const()[name = string("op_9675_interleave_0"), val = bool(false)]; tensor var_9675 = concat(axis = var_9674, interleave = var_9675_interleave_0, values = (var_9672, x1_71))[name = string("op_9675")]; tensor var_9676 = mul(x = var_9675, y = sin_1_cast_fp16)[name = string("op_9676")]; tensor key_states_69 = add(x = var_9651, y = var_9676)[name = string("key_states_69")]; tensor expand_dims_204 = const()[name = string("expand_dims_204"), val = tensor([17])]; tensor expand_dims_205 = const()[name = string("expand_dims_205"), val = tensor([0])]; tensor expand_dims_207 = const()[name = string("expand_dims_207"), val = tensor([0])]; tensor expand_dims_208 = const()[name = string("expand_dims_208"), val = tensor([18])]; int32 concat_138_axis_0 = const()[name = string("concat_138_axis_0"), val = int32(0)]; bool concat_138_interleave_0 = const()[name = string("concat_138_interleave_0"), val = bool(false)]; tensor concat_138 = concat(axis = concat_138_axis_0, interleave = concat_138_interleave_0, values = (expand_dims_204, expand_dims_205, current_pos, expand_dims_207))[name = string("concat_138")]; tensor concat_139_values1_0 = const()[name = string("concat_139_values1_0"), val = tensor([0])]; tensor concat_139_values3_0 = const()[name = string("concat_139_values3_0"), val = tensor([0])]; int32 concat_139_axis_0 = const()[name = string("concat_139_axis_0"), val = int32(0)]; bool concat_139_interleave_0 = const()[name = string("concat_139_interleave_0"), val = bool(false)]; tensor concat_139 = concat(axis = concat_139_axis_0, interleave = concat_139_interleave_0, values = (expand_dims_208, concat_139_values1_0, var_1725, concat_139_values3_0))[name = string("concat_139")]; tensor model_model_kv_cache_0_internal_tensor_assign_35_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_35_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_35_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_35_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_35_cast_fp16 = slice_update(begin = concat_138, begin_mask = model_model_kv_cache_0_internal_tensor_assign_35_begin_mask_0, end = concat_139, end_mask = model_model_kv_cache_0_internal_tensor_assign_35_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_35_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_35_stride_0, update = key_states_69, x = coreml_update_state_89)[name = string("model_model_kv_cache_0_internal_tensor_assign_35_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_35_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_34_write_state")]; tensor coreml_update_state_90 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_34")]; tensor expand_dims_210 = const()[name = string("expand_dims_210"), val = tensor([45])]; tensor expand_dims_211 = const()[name = string("expand_dims_211"), val = tensor([0])]; tensor expand_dims_213 = const()[name = string("expand_dims_213"), val = tensor([0])]; tensor expand_dims_214 = const()[name = string("expand_dims_214"), val = tensor([46])]; int32 concat_142_axis_0 = const()[name = string("concat_142_axis_0"), val = int32(0)]; bool concat_142_interleave_0 = const()[name = string("concat_142_interleave_0"), val = bool(false)]; tensor concat_142 = concat(axis = concat_142_axis_0, interleave = concat_142_interleave_0, values = (expand_dims_210, expand_dims_211, current_pos, expand_dims_213))[name = string("concat_142")]; tensor concat_143_values1_0 = const()[name = string("concat_143_values1_0"), val = tensor([0])]; tensor concat_143_values3_0 = const()[name = string("concat_143_values3_0"), val = tensor([0])]; int32 concat_143_axis_0 = const()[name = string("concat_143_axis_0"), val = int32(0)]; bool concat_143_interleave_0 = const()[name = string("concat_143_interleave_0"), val = bool(false)]; tensor concat_143 = concat(axis = concat_143_axis_0, interleave = concat_143_interleave_0, values = (expand_dims_214, concat_143_values1_0, var_1725, concat_143_values3_0))[name = string("concat_143")]; tensor model_model_kv_cache_0_internal_tensor_assign_36_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_36_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_36_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_36_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_36_cast_fp16 = slice_update(begin = concat_142, begin_mask = model_model_kv_cache_0_internal_tensor_assign_36_begin_mask_0, end = concat_143, end_mask = model_model_kv_cache_0_internal_tensor_assign_36_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_36_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_36_stride_0, update = var_9581, x = coreml_update_state_90)[name = string("model_model_kv_cache_0_internal_tensor_assign_36_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_36_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_35_write_state")]; tensor coreml_update_state_91 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_35")]; tensor var_9731_begin_0 = const()[name = string("op_9731_begin_0"), val = tensor([17, 0, 0, 0])]; tensor var_9731_end_0 = const()[name = string("op_9731_end_0"), val = tensor([18, 8, 1024, 128])]; tensor var_9731_end_mask_0 = const()[name = string("op_9731_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_9731_cast_fp16 = slice_by_index(begin = var_9731_begin_0, end = var_9731_end_0, end_mask = var_9731_end_mask_0, x = coreml_update_state_91)[name = string("op_9731_cast_fp16")]; tensor K_layer_cache_35_axes_0 = const()[name = string("K_layer_cache_35_axes_0"), val = tensor([0])]; tensor K_layer_cache_35_cast_fp16 = squeeze(axes = K_layer_cache_35_axes_0, x = var_9731_cast_fp16)[name = string("K_layer_cache_35_cast_fp16")]; tensor var_9738_begin_0 = const()[name = string("op_9738_begin_0"), val = tensor([45, 0, 0, 0])]; tensor var_9738_end_0 = const()[name = string("op_9738_end_0"), val = tensor([46, 8, 1024, 128])]; tensor var_9738_end_mask_0 = const()[name = string("op_9738_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_9738_cast_fp16 = slice_by_index(begin = var_9738_begin_0, end = var_9738_end_0, end_mask = var_9738_end_mask_0, x = coreml_update_state_91)[name = string("op_9738_cast_fp16")]; tensor V_layer_cache_35_axes_0 = const()[name = string("V_layer_cache_35_axes_0"), val = tensor([0])]; tensor V_layer_cache_35_cast_fp16 = squeeze(axes = V_layer_cache_35_axes_0, x = var_9738_cast_fp16)[name = string("V_layer_cache_35_cast_fp16")]; tensor x_347_axes_0 = const()[name = string("x_347_axes_0"), val = tensor([1])]; tensor x_347_cast_fp16 = expand_dims(axes = x_347_axes_0, x = K_layer_cache_35_cast_fp16)[name = string("x_347_cast_fp16")]; tensor var_9775 = const()[name = string("op_9775"), val = tensor([1, 2, 1, 1])]; tensor x_349_cast_fp16 = tile(reps = var_9775, x = x_347_cast_fp16)[name = string("x_349_cast_fp16")]; tensor var_9787 = const()[name = string("op_9787"), val = tensor([1, -1, 1024, 128])]; tensor key_states_71_cast_fp16 = reshape(shape = var_9787, x = x_349_cast_fp16)[name = string("key_states_71_cast_fp16")]; tensor x_353_axes_0 = const()[name = string("x_353_axes_0"), val = tensor([1])]; tensor x_353_cast_fp16 = expand_dims(axes = x_353_axes_0, x = V_layer_cache_35_cast_fp16)[name = string("x_353_cast_fp16")]; tensor var_9795 = const()[name = string("op_9795"), val = tensor([1, 2, 1, 1])]; tensor x_355_cast_fp16 = tile(reps = var_9795, x = x_353_cast_fp16)[name = string("x_355_cast_fp16")]; tensor var_9807 = const()[name = string("op_9807"), val = tensor([1, -1, 1024, 128])]; tensor value_states_105_cast_fp16 = reshape(shape = var_9807, x = x_355_cast_fp16)[name = string("value_states_105_cast_fp16")]; bool var_9822_transpose_x_1 = const()[name = string("op_9822_transpose_x_1"), val = bool(false)]; bool var_9822_transpose_y_1 = const()[name = string("op_9822_transpose_y_1"), val = bool(true)]; tensor var_9822 = matmul(transpose_x = var_9822_transpose_x_1, transpose_y = var_9822_transpose_y_1, x = query_states_69, y = key_states_71_cast_fp16)[name = string("op_9822")]; fp16 var_9823_to_fp16 = const()[name = string("op_9823_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_103_cast_fp16 = mul(x = var_9822, y = var_9823_to_fp16)[name = string("attn_weights_103_cast_fp16")]; tensor attn_weights_105_cast_fp16 = add(x = attn_weights_103_cast_fp16, y = causal_mask)[name = string("attn_weights_105_cast_fp16")]; int32 var_9858 = const()[name = string("op_9858"), val = int32(-1)]; tensor attn_weights_107_cast_fp16 = softmax(axis = var_9858, x = attn_weights_105_cast_fp16)[name = string("attn_weights_107_cast_fp16")]; bool attn_output_171_transpose_x_0 = const()[name = string("attn_output_171_transpose_x_0"), val = bool(false)]; bool attn_output_171_transpose_y_0 = const()[name = string("attn_output_171_transpose_y_0"), val = bool(false)]; tensor attn_output_171_cast_fp16 = matmul(transpose_x = attn_output_171_transpose_x_0, transpose_y = attn_output_171_transpose_y_0, x = attn_weights_107_cast_fp16, y = value_states_105_cast_fp16)[name = string("attn_output_171_cast_fp16")]; tensor var_9869_perm_0 = const()[name = string("op_9869_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_9873 = const()[name = string("op_9873"), val = tensor([1, 1, 2048])]; tensor var_9869_cast_fp16 = transpose(perm = var_9869_perm_0, x = attn_output_171_cast_fp16)[name = string("transpose_64")]; tensor attn_output_175_cast_fp16 = reshape(shape = var_9873, x = var_9869_cast_fp16)[name = string("attn_output_175_cast_fp16")]; tensor var_9878 = const()[name = string("op_9878"), val = tensor([0, 2, 1])]; string var_9894_pad_type_0 = const()[name = string("op_9894_pad_type_0"), val = string("valid")]; int32 var_9894_groups_0 = const()[name = string("op_9894_groups_0"), val = int32(1)]; tensor var_9894_strides_0 = const()[name = string("op_9894_strides_0"), val = tensor([1])]; tensor var_9894_pad_0 = const()[name = string("op_9894_pad_0"), val = tensor([0, 0])]; tensor var_9894_dilations_0 = const()[name = string("op_9894_dilations_0"), val = tensor([1])]; tensor squeeze_17_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1064911616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1068057408))))[name = string("squeeze_17_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_9879_cast_fp16 = transpose(perm = var_9878, x = attn_output_175_cast_fp16)[name = string("transpose_63")]; tensor var_9894_cast_fp16 = conv(dilations = var_9894_dilations_0, groups = var_9894_groups_0, pad = var_9894_pad_0, pad_type = var_9894_pad_type_0, strides = var_9894_strides_0, weight = squeeze_17_cast_fp16_to_fp32_to_fp16_palettized, x = var_9879_cast_fp16)[name = string("op_9894_cast_fp16")]; tensor var_9898 = const()[name = string("op_9898"), val = tensor([0, 2, 1])]; tensor attn_output_179_cast_fp16 = transpose(perm = var_9898, x = var_9894_cast_fp16)[name = string("transpose_62")]; tensor hidden_states_107_cast_fp16 = add(x = hidden_states_103_cast_fp16, y = attn_output_179_cast_fp16)[name = string("hidden_states_107_cast_fp16")]; tensor mean_143_axes_0 = const()[name = string("mean_143_axes_0"), val = tensor([-1])]; bool mean_143_keep_dims_0 = const()[name = string("mean_143_keep_dims_0"), val = bool(true)]; tensor mean_143_cast_fp16 = reduce_mean(axes = mean_143_axes_0, keep_dims = mean_143_keep_dims_0, x = hidden_states_107_cast_fp16)[name = string("mean_143_cast_fp16")]; tensor input_317_cast_fp16 = sub(x = hidden_states_107_cast_fp16, y = mean_143_cast_fp16)[name = string("input_317_cast_fp16")]; tensor var_9917_axes_0 = const()[name = string("op_9917_axes_0"), val = tensor([-1])]; tensor model_model_layers_17_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_17_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1068090240)))]; fp16 var_9905_to_fp16 = const()[name = string("op_9905_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9917_cast_fp16 = layer_norm(axes = var_9917_axes_0, epsilon = var_9905_to_fp16, gamma = model_model_layers_17_post_attention_layernorm_weight_to_fp16, x = input_317_cast_fp16)[name = string("op_9917_cast_fp16")]; tensor var_9931 = const()[name = string("op_9931"), val = tensor([0, 2, 1])]; tensor input_319_axes_0 = const()[name = string("input_319_axes_0"), val = tensor([2])]; tensor var_9932 = transpose(perm = var_9931, x = var_9917_cast_fp16)[name = string("transpose_61")]; tensor input_319 = expand_dims(axes = input_319_axes_0, x = var_9932)[name = string("input_319")]; string input_321_pad_type_0 = const()[name = string("input_321_pad_type_0"), val = string("valid")]; tensor input_321_strides_0 = const()[name = string("input_321_strides_0"), val = tensor([1, 1])]; tensor input_321_pad_0 = const()[name = string("input_321_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_321_dilations_0 = const()[name = string("input_321_dilations_0"), val = tensor([1, 1])]; int32 input_321_groups_0 = const()[name = string("input_321_groups_0"), val = int32(1)]; tensor input_321 = conv(dilations = input_321_dilations_0, groups = input_321_groups_0, pad = input_321_pad_0, pad_type = input_321_pad_type_0, strides = input_321_strides_0, weight = model_model_layers_17_mlp_gate_proj_weight_palettized, x = input_319)[name = string("input_321")]; string b_35_pad_type_0 = const()[name = string("b_35_pad_type_0"), val = string("valid")]; tensor b_35_strides_0 = const()[name = string("b_35_strides_0"), val = tensor([1, 1])]; tensor b_35_pad_0 = const()[name = string("b_35_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_35_dilations_0 = const()[name = string("b_35_dilations_0"), val = tensor([1, 1])]; int32 b_35_groups_0 = const()[name = string("b_35_groups_0"), val = int32(1)]; tensor b_35 = conv(dilations = b_35_dilations_0, groups = b_35_groups_0, pad = b_35_pad_0, pad_type = b_35_pad_type_0, strides = b_35_strides_0, weight = model_model_layers_17_mlp_up_proj_weight_palettized, x = input_319)[name = string("b_35")]; tensor c_35 = silu(x = input_321)[name = string("c_35")]; tensor input_323 = mul(x = c_35, y = b_35)[name = string("input_323")]; string e_35_pad_type_0 = const()[name = string("e_35_pad_type_0"), val = string("valid")]; tensor e_35_strides_0 = const()[name = string("e_35_strides_0"), val = tensor([1, 1])]; tensor e_35_pad_0 = const()[name = string("e_35_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_35_dilations_0 = const()[name = string("e_35_dilations_0"), val = tensor([1, 1])]; int32 e_35_groups_0 = const()[name = string("e_35_groups_0"), val = int32(1)]; tensor e_35 = conv(dilations = e_35_dilations_0, groups = e_35_groups_0, pad = e_35_pad_0, pad_type = e_35_pad_type_0, strides = e_35_strides_0, weight = model_model_layers_17_mlp_down_proj_weight_palettized, x = input_323)[name = string("e_35")]; tensor var_9954_axes_0 = const()[name = string("op_9954_axes_0"), val = tensor([2])]; tensor var_9954 = squeeze(axes = var_9954_axes_0, x = e_35)[name = string("op_9954")]; tensor var_9955 = const()[name = string("op_9955"), val = tensor([0, 2, 1])]; tensor var_9956 = transpose(perm = var_9955, x = var_9954)[name = string("transpose_60")]; tensor hidden_states_109_cast_fp16 = add(x = hidden_states_107_cast_fp16, y = var_9956)[name = string("hidden_states_109_cast_fp16")]; tensor mean_145_axes_0 = const()[name = string("mean_145_axes_0"), val = tensor([-1])]; bool mean_145_keep_dims_0 = const()[name = string("mean_145_keep_dims_0"), val = bool(true)]; tensor mean_145_cast_fp16 = reduce_mean(axes = mean_145_axes_0, keep_dims = mean_145_keep_dims_0, x = hidden_states_109_cast_fp16)[name = string("mean_145_cast_fp16")]; tensor input_325_cast_fp16 = sub(x = hidden_states_109_cast_fp16, y = mean_145_cast_fp16)[name = string("input_325_cast_fp16")]; tensor var_9974_axes_0 = const()[name = string("op_9974_axes_0"), val = tensor([-1])]; tensor model_model_layers_18_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_18_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1068094400)))]; fp16 var_9962_to_fp16 = const()[name = string("op_9962_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9974_cast_fp16 = layer_norm(axes = var_9974_axes_0, epsilon = var_9962_to_fp16, gamma = model_model_layers_18_input_layernorm_weight_to_fp16, x = input_325_cast_fp16)[name = string("op_9974_cast_fp16")]; tensor var_9980 = const()[name = string("op_9980"), val = tensor([0, 2, 1])]; tensor var_9983_axes_0 = const()[name = string("op_9983_axes_0"), val = tensor([2])]; tensor var_9981 = transpose(perm = var_9980, x = var_9974_cast_fp16)[name = string("transpose_59")]; tensor var_9983 = expand_dims(axes = var_9983_axes_0, x = var_9981)[name = string("op_9983")]; string var_9999_pad_type_0 = const()[name = string("op_9999_pad_type_0"), val = string("valid")]; tensor var_9999_strides_0 = const()[name = string("op_9999_strides_0"), val = tensor([1, 1])]; tensor var_9999_pad_0 = const()[name = string("op_9999_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9999_dilations_0 = const()[name = string("op_9999_dilations_0"), val = tensor([1, 1])]; int32 var_9999_groups_0 = const()[name = string("op_9999_groups_0"), val = int32(1)]; tensor var_9999 = conv(dilations = var_9999_dilations_0, groups = var_9999_groups_0, pad = var_9999_pad_0, pad_type = var_9999_pad_type_0, strides = var_9999_strides_0, weight = model_model_layers_18_self_attn_q_proj_weight_palettized, x = var_9983)[name = string("op_9999")]; tensor var_10004 = const()[name = string("op_10004"), val = tensor([1, 16, 1, 128])]; tensor var_10005 = reshape(shape = var_10004, x = var_9999)[name = string("op_10005")]; string var_10021_pad_type_0 = const()[name = string("op_10021_pad_type_0"), val = string("valid")]; tensor var_10021_strides_0 = const()[name = string("op_10021_strides_0"), val = tensor([1, 1])]; tensor var_10021_pad_0 = const()[name = string("op_10021_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_10021_dilations_0 = const()[name = string("op_10021_dilations_0"), val = tensor([1, 1])]; int32 var_10021_groups_0 = const()[name = string("op_10021_groups_0"), val = int32(1)]; tensor var_10021 = conv(dilations = var_10021_dilations_0, groups = var_10021_groups_0, pad = var_10021_pad_0, pad_type = var_10021_pad_type_0, strides = var_10021_strides_0, weight = model_model_layers_18_self_attn_k_proj_weight_palettized, x = var_9983)[name = string("op_10021")]; tensor var_10026 = const()[name = string("op_10026"), val = tensor([1, 8, 1, 128])]; tensor var_10027 = reshape(shape = var_10026, x = var_10021)[name = string("op_10027")]; string var_10043_pad_type_0 = const()[name = string("op_10043_pad_type_0"), val = string("valid")]; tensor var_10043_strides_0 = const()[name = string("op_10043_strides_0"), val = tensor([1, 1])]; tensor var_10043_pad_0 = const()[name = string("op_10043_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_10043_dilations_0 = const()[name = string("op_10043_dilations_0"), val = tensor([1, 1])]; int32 var_10043_groups_0 = const()[name = string("op_10043_groups_0"), val = int32(1)]; tensor var_10043 = conv(dilations = var_10043_dilations_0, groups = var_10043_groups_0, pad = var_10043_pad_0, pad_type = var_10043_pad_type_0, strides = var_10043_strides_0, weight = model_model_layers_18_self_attn_v_proj_weight_palettized, x = var_9983)[name = string("op_10043")]; tensor var_10048 = const()[name = string("op_10048"), val = tensor([1, 8, 1, 128])]; tensor var_10049 = reshape(shape = var_10048, x = var_10043)[name = string("op_10049")]; tensor mean_147_axes_0 = const()[name = string("mean_147_axes_0"), val = tensor([-1])]; bool mean_147_keep_dims_0 = const()[name = string("mean_147_keep_dims_0"), val = bool(true)]; tensor mean_147 = reduce_mean(axes = mean_147_axes_0, keep_dims = mean_147_keep_dims_0, x = var_10005)[name = string("mean_147")]; tensor input_329 = sub(x = var_10005, y = mean_147)[name = string("input_329")]; tensor var_10070_axes_0 = const()[name = string("op_10070_axes_0"), val = tensor([-1])]; tensor model_model_layers_18_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_18_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1068098560)))]; fp16 var_10058_to_fp16 = const()[name = string("op_10058_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10070_cast_fp16 = layer_norm(axes = var_10070_axes_0, epsilon = var_10058_to_fp16, gamma = model_model_layers_18_self_attn_q_norm_weight_to_fp16, x = input_329)[name = string("op_10070_cast_fp16")]; tensor mean_149_axes_0 = const()[name = string("mean_149_axes_0"), val = tensor([-1])]; bool mean_149_keep_dims_0 = const()[name = string("mean_149_keep_dims_0"), val = bool(true)]; tensor mean_149 = reduce_mean(axes = mean_149_axes_0, keep_dims = mean_149_keep_dims_0, x = var_10027)[name = string("mean_149")]; tensor input_331 = sub(x = var_10027, y = mean_149)[name = string("input_331")]; tensor var_10088_axes_0 = const()[name = string("op_10088_axes_0"), val = tensor([-1])]; tensor model_model_layers_18_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_18_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1068098880)))]; fp16 var_10076_to_fp16 = const()[name = string("op_10076_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10088_cast_fp16 = layer_norm(axes = var_10088_axes_0, epsilon = var_10076_to_fp16, gamma = model_model_layers_18_self_attn_k_norm_weight_to_fp16, x = input_331)[name = string("op_10088_cast_fp16")]; tensor var_10091 = mul(x = var_10070_cast_fp16, y = cos_1_cast_fp16)[name = string("op_10091")]; tensor x1_73_begin_0 = const()[name = string("x1_73_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_73_end_0 = const()[name = string("x1_73_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_73_end_mask_0 = const()[name = string("x1_73_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_73 = slice_by_index(begin = x1_73_begin_0, end = x1_73_end_0, end_mask = x1_73_end_mask_0, x = var_10070_cast_fp16)[name = string("x1_73")]; tensor x2_73_begin_0 = const()[name = string("x2_73_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_73_end_0 = const()[name = string("x2_73_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_73_end_mask_0 = const()[name = string("x2_73_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_73 = slice_by_index(begin = x2_73_begin_0, end = x2_73_end_0, end_mask = x2_73_end_mask_0, x = var_10070_cast_fp16)[name = string("x2_73")]; fp16 const_329_promoted = const()[name = string("const_329_promoted"), val = fp16(-0x1p+0)]; tensor var_10112 = mul(x = x2_73, y = const_329_promoted)[name = string("op_10112")]; int32 var_10114 = const()[name = string("op_10114"), val = int32(-1)]; bool var_10115_interleave_0 = const()[name = string("op_10115_interleave_0"), val = bool(false)]; tensor var_10115 = concat(axis = var_10114, interleave = var_10115_interleave_0, values = (var_10112, x1_73))[name = string("op_10115")]; tensor var_10116 = mul(x = var_10115, y = sin_1_cast_fp16)[name = string("op_10116")]; tensor query_states_73 = add(x = var_10091, y = var_10116)[name = string("query_states_73")]; tensor var_10119 = mul(x = var_10088_cast_fp16, y = cos_1_cast_fp16)[name = string("op_10119")]; tensor x1_75_begin_0 = const()[name = string("x1_75_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_75_end_0 = const()[name = string("x1_75_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_75_end_mask_0 = const()[name = string("x1_75_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_75 = slice_by_index(begin = x1_75_begin_0, end = x1_75_end_0, end_mask = x1_75_end_mask_0, x = var_10088_cast_fp16)[name = string("x1_75")]; tensor x2_75_begin_0 = const()[name = string("x2_75_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_75_end_0 = const()[name = string("x2_75_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_75_end_mask_0 = const()[name = string("x2_75_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_75 = slice_by_index(begin = x2_75_begin_0, end = x2_75_end_0, end_mask = x2_75_end_mask_0, x = var_10088_cast_fp16)[name = string("x2_75")]; fp16 const_332_promoted = const()[name = string("const_332_promoted"), val = fp16(-0x1p+0)]; tensor var_10140 = mul(x = x2_75, y = const_332_promoted)[name = string("op_10140")]; int32 var_10142 = const()[name = string("op_10142"), val = int32(-1)]; bool var_10143_interleave_0 = const()[name = string("op_10143_interleave_0"), val = bool(false)]; tensor var_10143 = concat(axis = var_10142, interleave = var_10143_interleave_0, values = (var_10140, x1_75))[name = string("op_10143")]; tensor var_10144 = mul(x = var_10143, y = sin_1_cast_fp16)[name = string("op_10144")]; tensor key_states_73 = add(x = var_10119, y = var_10144)[name = string("key_states_73")]; tensor expand_dims_216 = const()[name = string("expand_dims_216"), val = tensor([18])]; tensor expand_dims_217 = const()[name = string("expand_dims_217"), val = tensor([0])]; tensor expand_dims_219 = const()[name = string("expand_dims_219"), val = tensor([0])]; tensor expand_dims_220 = const()[name = string("expand_dims_220"), val = tensor([19])]; int32 concat_146_axis_0 = const()[name = string("concat_146_axis_0"), val = int32(0)]; bool concat_146_interleave_0 = const()[name = string("concat_146_interleave_0"), val = bool(false)]; tensor concat_146 = concat(axis = concat_146_axis_0, interleave = concat_146_interleave_0, values = (expand_dims_216, expand_dims_217, current_pos, expand_dims_219))[name = string("concat_146")]; tensor concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = tensor([0])]; tensor concat_147_values3_0 = const()[name = string("concat_147_values3_0"), val = tensor([0])]; int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (expand_dims_220, concat_147_values1_0, var_1725, concat_147_values3_0))[name = string("concat_147")]; tensor model_model_kv_cache_0_internal_tensor_assign_37_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_37_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_37_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_37_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_37_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_37_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_37_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_37_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_37_cast_fp16 = slice_update(begin = concat_146, begin_mask = model_model_kv_cache_0_internal_tensor_assign_37_begin_mask_0, end = concat_147, end_mask = model_model_kv_cache_0_internal_tensor_assign_37_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_37_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_37_stride_0, update = key_states_73, x = coreml_update_state_91)[name = string("model_model_kv_cache_0_internal_tensor_assign_37_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_37_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_36_write_state")]; tensor coreml_update_state_92 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_36")]; tensor expand_dims_222 = const()[name = string("expand_dims_222"), val = tensor([46])]; tensor expand_dims_223 = const()[name = string("expand_dims_223"), val = tensor([0])]; tensor expand_dims_225 = const()[name = string("expand_dims_225"), val = tensor([0])]; tensor expand_dims_226 = const()[name = string("expand_dims_226"), val = tensor([47])]; int32 concat_150_axis_0 = const()[name = string("concat_150_axis_0"), val = int32(0)]; bool concat_150_interleave_0 = const()[name = string("concat_150_interleave_0"), val = bool(false)]; tensor concat_150 = concat(axis = concat_150_axis_0, interleave = concat_150_interleave_0, values = (expand_dims_222, expand_dims_223, current_pos, expand_dims_225))[name = string("concat_150")]; tensor concat_151_values1_0 = const()[name = string("concat_151_values1_0"), val = tensor([0])]; tensor concat_151_values3_0 = const()[name = string("concat_151_values3_0"), val = tensor([0])]; int32 concat_151_axis_0 = const()[name = string("concat_151_axis_0"), val = int32(0)]; bool concat_151_interleave_0 = const()[name = string("concat_151_interleave_0"), val = bool(false)]; tensor concat_151 = concat(axis = concat_151_axis_0, interleave = concat_151_interleave_0, values = (expand_dims_226, concat_151_values1_0, var_1725, concat_151_values3_0))[name = string("concat_151")]; tensor model_model_kv_cache_0_internal_tensor_assign_38_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_38_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_38_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_38_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_38_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_38_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_38_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_38_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_38_cast_fp16 = slice_update(begin = concat_150, begin_mask = model_model_kv_cache_0_internal_tensor_assign_38_begin_mask_0, end = concat_151, end_mask = model_model_kv_cache_0_internal_tensor_assign_38_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_38_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_38_stride_0, update = var_10049, x = coreml_update_state_92)[name = string("model_model_kv_cache_0_internal_tensor_assign_38_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_38_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_37_write_state")]; tensor coreml_update_state_93 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_37")]; tensor var_10199_begin_0 = const()[name = string("op_10199_begin_0"), val = tensor([18, 0, 0, 0])]; tensor var_10199_end_0 = const()[name = string("op_10199_end_0"), val = tensor([19, 8, 1024, 128])]; tensor var_10199_end_mask_0 = const()[name = string("op_10199_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_10199_cast_fp16 = slice_by_index(begin = var_10199_begin_0, end = var_10199_end_0, end_mask = var_10199_end_mask_0, x = coreml_update_state_93)[name = string("op_10199_cast_fp16")]; tensor K_layer_cache_37_axes_0 = const()[name = string("K_layer_cache_37_axes_0"), val = tensor([0])]; tensor K_layer_cache_37_cast_fp16 = squeeze(axes = K_layer_cache_37_axes_0, x = var_10199_cast_fp16)[name = string("K_layer_cache_37_cast_fp16")]; tensor var_10206_begin_0 = const()[name = string("op_10206_begin_0"), val = tensor([46, 0, 0, 0])]; tensor var_10206_end_0 = const()[name = string("op_10206_end_0"), val = tensor([47, 8, 1024, 128])]; tensor var_10206_end_mask_0 = const()[name = string("op_10206_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_10206_cast_fp16 = slice_by_index(begin = var_10206_begin_0, end = var_10206_end_0, end_mask = var_10206_end_mask_0, x = coreml_update_state_93)[name = string("op_10206_cast_fp16")]; tensor V_layer_cache_37_axes_0 = const()[name = string("V_layer_cache_37_axes_0"), val = tensor([0])]; tensor V_layer_cache_37_cast_fp16 = squeeze(axes = V_layer_cache_37_axes_0, x = var_10206_cast_fp16)[name = string("V_layer_cache_37_cast_fp16")]; tensor x_367_axes_0 = const()[name = string("x_367_axes_0"), val = tensor([1])]; tensor x_367_cast_fp16 = expand_dims(axes = x_367_axes_0, x = K_layer_cache_37_cast_fp16)[name = string("x_367_cast_fp16")]; tensor var_10243 = const()[name = string("op_10243"), val = tensor([1, 2, 1, 1])]; tensor x_369_cast_fp16 = tile(reps = var_10243, x = x_367_cast_fp16)[name = string("x_369_cast_fp16")]; tensor var_10255 = const()[name = string("op_10255"), val = tensor([1, -1, 1024, 128])]; tensor key_states_75_cast_fp16 = reshape(shape = var_10255, x = x_369_cast_fp16)[name = string("key_states_75_cast_fp16")]; tensor x_373_axes_0 = const()[name = string("x_373_axes_0"), val = tensor([1])]; tensor x_373_cast_fp16 = expand_dims(axes = x_373_axes_0, x = V_layer_cache_37_cast_fp16)[name = string("x_373_cast_fp16")]; tensor var_10263 = const()[name = string("op_10263"), val = tensor([1, 2, 1, 1])]; tensor x_375_cast_fp16 = tile(reps = var_10263, x = x_373_cast_fp16)[name = string("x_375_cast_fp16")]; tensor var_10275 = const()[name = string("op_10275"), val = tensor([1, -1, 1024, 128])]; tensor value_states_111_cast_fp16 = reshape(shape = var_10275, x = x_375_cast_fp16)[name = string("value_states_111_cast_fp16")]; bool var_10290_transpose_x_1 = const()[name = string("op_10290_transpose_x_1"), val = bool(false)]; bool var_10290_transpose_y_1 = const()[name = string("op_10290_transpose_y_1"), val = bool(true)]; tensor var_10290 = matmul(transpose_x = var_10290_transpose_x_1, transpose_y = var_10290_transpose_y_1, x = query_states_73, y = key_states_75_cast_fp16)[name = string("op_10290")]; fp16 var_10291_to_fp16 = const()[name = string("op_10291_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_109_cast_fp16 = mul(x = var_10290, y = var_10291_to_fp16)[name = string("attn_weights_109_cast_fp16")]; tensor attn_weights_111_cast_fp16 = add(x = attn_weights_109_cast_fp16, y = causal_mask)[name = string("attn_weights_111_cast_fp16")]; int32 var_10326 = const()[name = string("op_10326"), val = int32(-1)]; tensor attn_weights_113_cast_fp16 = softmax(axis = var_10326, x = attn_weights_111_cast_fp16)[name = string("attn_weights_113_cast_fp16")]; bool attn_output_181_transpose_x_0 = const()[name = string("attn_output_181_transpose_x_0"), val = bool(false)]; bool attn_output_181_transpose_y_0 = const()[name = string("attn_output_181_transpose_y_0"), val = bool(false)]; tensor attn_output_181_cast_fp16 = matmul(transpose_x = attn_output_181_transpose_x_0, transpose_y = attn_output_181_transpose_y_0, x = attn_weights_113_cast_fp16, y = value_states_111_cast_fp16)[name = string("attn_output_181_cast_fp16")]; tensor var_10337_perm_0 = const()[name = string("op_10337_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_10341 = const()[name = string("op_10341"), val = tensor([1, 1, 2048])]; tensor var_10337_cast_fp16 = transpose(perm = var_10337_perm_0, x = attn_output_181_cast_fp16)[name = string("transpose_58")]; tensor attn_output_185_cast_fp16 = reshape(shape = var_10341, x = var_10337_cast_fp16)[name = string("attn_output_185_cast_fp16")]; tensor var_10346 = const()[name = string("op_10346"), val = tensor([0, 2, 1])]; string var_10362_pad_type_0 = const()[name = string("op_10362_pad_type_0"), val = string("valid")]; int32 var_10362_groups_0 = const()[name = string("op_10362_groups_0"), val = int32(1)]; tensor var_10362_strides_0 = const()[name = string("op_10362_strides_0"), val = tensor([1])]; tensor var_10362_pad_0 = const()[name = string("op_10362_pad_0"), val = tensor([0, 0])]; tensor var_10362_dilations_0 = const()[name = string("op_10362_dilations_0"), val = tensor([1])]; tensor squeeze_18_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1068099200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1071244992))))[name = string("squeeze_18_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_10347_cast_fp16 = transpose(perm = var_10346, x = attn_output_185_cast_fp16)[name = string("transpose_57")]; tensor var_10362_cast_fp16 = conv(dilations = var_10362_dilations_0, groups = var_10362_groups_0, pad = var_10362_pad_0, pad_type = var_10362_pad_type_0, strides = var_10362_strides_0, weight = squeeze_18_cast_fp16_to_fp32_to_fp16_palettized, x = var_10347_cast_fp16)[name = string("op_10362_cast_fp16")]; tensor var_10366 = const()[name = string("op_10366"), val = tensor([0, 2, 1])]; tensor attn_output_189_cast_fp16 = transpose(perm = var_10366, x = var_10362_cast_fp16)[name = string("transpose_56")]; tensor hidden_states_113_cast_fp16 = add(x = hidden_states_109_cast_fp16, y = attn_output_189_cast_fp16)[name = string("hidden_states_113_cast_fp16")]; tensor mean_151_axes_0 = const()[name = string("mean_151_axes_0"), val = tensor([-1])]; bool mean_151_keep_dims_0 = const()[name = string("mean_151_keep_dims_0"), val = bool(true)]; tensor mean_151_cast_fp16 = reduce_mean(axes = mean_151_axes_0, keep_dims = mean_151_keep_dims_0, x = hidden_states_113_cast_fp16)[name = string("mean_151_cast_fp16")]; tensor input_335_cast_fp16 = sub(x = hidden_states_113_cast_fp16, y = mean_151_cast_fp16)[name = string("input_335_cast_fp16")]; tensor var_10385_axes_0 = const()[name = string("op_10385_axes_0"), val = tensor([-1])]; tensor model_model_layers_18_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_18_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1071277824)))]; fp16 var_10373_to_fp16 = const()[name = string("op_10373_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10385_cast_fp16 = layer_norm(axes = var_10385_axes_0, epsilon = var_10373_to_fp16, gamma = model_model_layers_18_post_attention_layernorm_weight_to_fp16, x = input_335_cast_fp16)[name = string("op_10385_cast_fp16")]; tensor var_10399 = const()[name = string("op_10399"), val = tensor([0, 2, 1])]; tensor input_337_axes_0 = const()[name = string("input_337_axes_0"), val = tensor([2])]; tensor var_10400 = transpose(perm = var_10399, x = var_10385_cast_fp16)[name = string("transpose_55")]; tensor input_337 = expand_dims(axes = input_337_axes_0, x = var_10400)[name = string("input_337")]; string input_339_pad_type_0 = const()[name = string("input_339_pad_type_0"), val = string("valid")]; tensor input_339_strides_0 = const()[name = string("input_339_strides_0"), val = tensor([1, 1])]; tensor input_339_pad_0 = const()[name = string("input_339_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_339_dilations_0 = const()[name = string("input_339_dilations_0"), val = tensor([1, 1])]; int32 input_339_groups_0 = const()[name = string("input_339_groups_0"), val = int32(1)]; tensor input_339 = conv(dilations = input_339_dilations_0, groups = input_339_groups_0, pad = input_339_pad_0, pad_type = input_339_pad_type_0, strides = input_339_strides_0, weight = model_model_layers_18_mlp_gate_proj_weight_palettized, x = input_337)[name = string("input_339")]; string b_37_pad_type_0 = const()[name = string("b_37_pad_type_0"), val = string("valid")]; tensor b_37_strides_0 = const()[name = string("b_37_strides_0"), val = tensor([1, 1])]; tensor b_37_pad_0 = const()[name = string("b_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_37_dilations_0 = const()[name = string("b_37_dilations_0"), val = tensor([1, 1])]; int32 b_37_groups_0 = const()[name = string("b_37_groups_0"), val = int32(1)]; tensor b_37 = conv(dilations = b_37_dilations_0, groups = b_37_groups_0, pad = b_37_pad_0, pad_type = b_37_pad_type_0, strides = b_37_strides_0, weight = model_model_layers_18_mlp_up_proj_weight_palettized, x = input_337)[name = string("b_37")]; tensor c_37 = silu(x = input_339)[name = string("c_37")]; tensor input_341 = mul(x = c_37, y = b_37)[name = string("input_341")]; string e_37_pad_type_0 = const()[name = string("e_37_pad_type_0"), val = string("valid")]; tensor e_37_strides_0 = const()[name = string("e_37_strides_0"), val = tensor([1, 1])]; tensor e_37_pad_0 = const()[name = string("e_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_37_dilations_0 = const()[name = string("e_37_dilations_0"), val = tensor([1, 1])]; int32 e_37_groups_0 = const()[name = string("e_37_groups_0"), val = int32(1)]; tensor e_37 = conv(dilations = e_37_dilations_0, groups = e_37_groups_0, pad = e_37_pad_0, pad_type = e_37_pad_type_0, strides = e_37_strides_0, weight = model_model_layers_18_mlp_down_proj_weight_palettized, x = input_341)[name = string("e_37")]; tensor var_10422_axes_0 = const()[name = string("op_10422_axes_0"), val = tensor([2])]; tensor var_10422 = squeeze(axes = var_10422_axes_0, x = e_37)[name = string("op_10422")]; tensor var_10423 = const()[name = string("op_10423"), val = tensor([0, 2, 1])]; tensor var_10424 = transpose(perm = var_10423, x = var_10422)[name = string("transpose_54")]; tensor hidden_states_115_cast_fp16 = add(x = hidden_states_113_cast_fp16, y = var_10424)[name = string("hidden_states_115_cast_fp16")]; tensor mean_153_axes_0 = const()[name = string("mean_153_axes_0"), val = tensor([-1])]; bool mean_153_keep_dims_0 = const()[name = string("mean_153_keep_dims_0"), val = bool(true)]; tensor mean_153_cast_fp16 = reduce_mean(axes = mean_153_axes_0, keep_dims = mean_153_keep_dims_0, x = hidden_states_115_cast_fp16)[name = string("mean_153_cast_fp16")]; tensor input_343_cast_fp16 = sub(x = hidden_states_115_cast_fp16, y = mean_153_cast_fp16)[name = string("input_343_cast_fp16")]; tensor var_10442_axes_0 = const()[name = string("op_10442_axes_0"), val = tensor([-1])]; tensor model_model_layers_19_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_19_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1071281984)))]; fp16 var_10430_to_fp16 = const()[name = string("op_10430_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10442_cast_fp16 = layer_norm(axes = var_10442_axes_0, epsilon = var_10430_to_fp16, gamma = model_model_layers_19_input_layernorm_weight_to_fp16, x = input_343_cast_fp16)[name = string("op_10442_cast_fp16")]; tensor var_10448 = const()[name = string("op_10448"), val = tensor([0, 2, 1])]; tensor var_10451_axes_0 = const()[name = string("op_10451_axes_0"), val = tensor([2])]; tensor var_10449 = transpose(perm = var_10448, x = var_10442_cast_fp16)[name = string("transpose_53")]; tensor var_10451 = expand_dims(axes = var_10451_axes_0, x = var_10449)[name = string("op_10451")]; string var_10467_pad_type_0 = const()[name = string("op_10467_pad_type_0"), val = string("valid")]; tensor var_10467_strides_0 = const()[name = string("op_10467_strides_0"), val = tensor([1, 1])]; tensor var_10467_pad_0 = const()[name = string("op_10467_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_10467_dilations_0 = const()[name = string("op_10467_dilations_0"), val = tensor([1, 1])]; int32 var_10467_groups_0 = const()[name = string("op_10467_groups_0"), val = int32(1)]; tensor var_10467 = conv(dilations = var_10467_dilations_0, groups = var_10467_groups_0, pad = var_10467_pad_0, pad_type = var_10467_pad_type_0, strides = var_10467_strides_0, weight = model_model_layers_19_self_attn_q_proj_weight_palettized, x = var_10451)[name = string("op_10467")]; tensor var_10472 = const()[name = string("op_10472"), val = tensor([1, 16, 1, 128])]; tensor var_10473 = reshape(shape = var_10472, x = var_10467)[name = string("op_10473")]; string var_10489_pad_type_0 = const()[name = string("op_10489_pad_type_0"), val = string("valid")]; tensor var_10489_strides_0 = const()[name = string("op_10489_strides_0"), val = tensor([1, 1])]; tensor var_10489_pad_0 = const()[name = string("op_10489_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_10489_dilations_0 = const()[name = string("op_10489_dilations_0"), val = tensor([1, 1])]; int32 var_10489_groups_0 = const()[name = string("op_10489_groups_0"), val = int32(1)]; tensor var_10489 = conv(dilations = var_10489_dilations_0, groups = var_10489_groups_0, pad = var_10489_pad_0, pad_type = var_10489_pad_type_0, strides = var_10489_strides_0, weight = model_model_layers_19_self_attn_k_proj_weight_palettized, x = var_10451)[name = string("op_10489")]; tensor var_10494 = const()[name = string("op_10494"), val = tensor([1, 8, 1, 128])]; tensor var_10495 = reshape(shape = var_10494, x = var_10489)[name = string("op_10495")]; string var_10511_pad_type_0 = const()[name = string("op_10511_pad_type_0"), val = string("valid")]; tensor var_10511_strides_0 = const()[name = string("op_10511_strides_0"), val = tensor([1, 1])]; tensor var_10511_pad_0 = const()[name = string("op_10511_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_10511_dilations_0 = const()[name = string("op_10511_dilations_0"), val = tensor([1, 1])]; int32 var_10511_groups_0 = const()[name = string("op_10511_groups_0"), val = int32(1)]; tensor var_10511 = conv(dilations = var_10511_dilations_0, groups = var_10511_groups_0, pad = var_10511_pad_0, pad_type = var_10511_pad_type_0, strides = var_10511_strides_0, weight = model_model_layers_19_self_attn_v_proj_weight_palettized, x = var_10451)[name = string("op_10511")]; tensor var_10516 = const()[name = string("op_10516"), val = tensor([1, 8, 1, 128])]; tensor var_10517 = reshape(shape = var_10516, x = var_10511)[name = string("op_10517")]; tensor mean_155_axes_0 = const()[name = string("mean_155_axes_0"), val = tensor([-1])]; bool mean_155_keep_dims_0 = const()[name = string("mean_155_keep_dims_0"), val = bool(true)]; tensor mean_155 = reduce_mean(axes = mean_155_axes_0, keep_dims = mean_155_keep_dims_0, x = var_10473)[name = string("mean_155")]; tensor input_347 = sub(x = var_10473, y = mean_155)[name = string("input_347")]; tensor var_10538_axes_0 = const()[name = string("op_10538_axes_0"), val = tensor([-1])]; tensor model_model_layers_19_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_19_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1071286144)))]; fp16 var_10526_to_fp16 = const()[name = string("op_10526_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10538_cast_fp16 = layer_norm(axes = var_10538_axes_0, epsilon = var_10526_to_fp16, gamma = model_model_layers_19_self_attn_q_norm_weight_to_fp16, x = input_347)[name = string("op_10538_cast_fp16")]; tensor mean_157_axes_0 = const()[name = string("mean_157_axes_0"), val = tensor([-1])]; bool mean_157_keep_dims_0 = const()[name = string("mean_157_keep_dims_0"), val = bool(true)]; tensor mean_157 = reduce_mean(axes = mean_157_axes_0, keep_dims = mean_157_keep_dims_0, x = var_10495)[name = string("mean_157")]; tensor input_349 = sub(x = var_10495, y = mean_157)[name = string("input_349")]; tensor var_10556_axes_0 = const()[name = string("op_10556_axes_0"), val = tensor([-1])]; tensor model_model_layers_19_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_19_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1071286464)))]; fp16 var_10544_to_fp16 = const()[name = string("op_10544_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10556_cast_fp16 = layer_norm(axes = var_10556_axes_0, epsilon = var_10544_to_fp16, gamma = model_model_layers_19_self_attn_k_norm_weight_to_fp16, x = input_349)[name = string("op_10556_cast_fp16")]; tensor var_10559 = mul(x = var_10538_cast_fp16, y = cos_1_cast_fp16)[name = string("op_10559")]; tensor x1_77_begin_0 = const()[name = string("x1_77_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_77_end_0 = const()[name = string("x1_77_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_77_end_mask_0 = const()[name = string("x1_77_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_77 = slice_by_index(begin = x1_77_begin_0, end = x1_77_end_0, end_mask = x1_77_end_mask_0, x = var_10538_cast_fp16)[name = string("x1_77")]; tensor x2_77_begin_0 = const()[name = string("x2_77_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_77_end_0 = const()[name = string("x2_77_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_77_end_mask_0 = const()[name = string("x2_77_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_77 = slice_by_index(begin = x2_77_begin_0, end = x2_77_end_0, end_mask = x2_77_end_mask_0, x = var_10538_cast_fp16)[name = string("x2_77")]; fp16 const_347_promoted = const()[name = string("const_347_promoted"), val = fp16(-0x1p+0)]; tensor var_10580 = mul(x = x2_77, y = const_347_promoted)[name = string("op_10580")]; int32 var_10582 = const()[name = string("op_10582"), val = int32(-1)]; bool var_10583_interleave_0 = const()[name = string("op_10583_interleave_0"), val = bool(false)]; tensor var_10583 = concat(axis = var_10582, interleave = var_10583_interleave_0, values = (var_10580, x1_77))[name = string("op_10583")]; tensor var_10584 = mul(x = var_10583, y = sin_1_cast_fp16)[name = string("op_10584")]; tensor query_states_77 = add(x = var_10559, y = var_10584)[name = string("query_states_77")]; tensor var_10587 = mul(x = var_10556_cast_fp16, y = cos_1_cast_fp16)[name = string("op_10587")]; tensor x1_79_begin_0 = const()[name = string("x1_79_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_79_end_0 = const()[name = string("x1_79_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_79_end_mask_0 = const()[name = string("x1_79_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_79 = slice_by_index(begin = x1_79_begin_0, end = x1_79_end_0, end_mask = x1_79_end_mask_0, x = var_10556_cast_fp16)[name = string("x1_79")]; tensor x2_79_begin_0 = const()[name = string("x2_79_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_79_end_0 = const()[name = string("x2_79_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_79_end_mask_0 = const()[name = string("x2_79_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_79 = slice_by_index(begin = x2_79_begin_0, end = x2_79_end_0, end_mask = x2_79_end_mask_0, x = var_10556_cast_fp16)[name = string("x2_79")]; fp16 const_350_promoted = const()[name = string("const_350_promoted"), val = fp16(-0x1p+0)]; tensor var_10608 = mul(x = x2_79, y = const_350_promoted)[name = string("op_10608")]; int32 var_10610 = const()[name = string("op_10610"), val = int32(-1)]; bool var_10611_interleave_0 = const()[name = string("op_10611_interleave_0"), val = bool(false)]; tensor var_10611 = concat(axis = var_10610, interleave = var_10611_interleave_0, values = (var_10608, x1_79))[name = string("op_10611")]; tensor var_10612 = mul(x = var_10611, y = sin_1_cast_fp16)[name = string("op_10612")]; tensor key_states_77 = add(x = var_10587, y = var_10612)[name = string("key_states_77")]; tensor expand_dims_228 = const()[name = string("expand_dims_228"), val = tensor([19])]; tensor expand_dims_229 = const()[name = string("expand_dims_229"), val = tensor([0])]; tensor expand_dims_231 = const()[name = string("expand_dims_231"), val = tensor([0])]; tensor expand_dims_232 = const()[name = string("expand_dims_232"), val = tensor([20])]; int32 concat_154_axis_0 = const()[name = string("concat_154_axis_0"), val = int32(0)]; bool concat_154_interleave_0 = const()[name = string("concat_154_interleave_0"), val = bool(false)]; tensor concat_154 = concat(axis = concat_154_axis_0, interleave = concat_154_interleave_0, values = (expand_dims_228, expand_dims_229, current_pos, expand_dims_231))[name = string("concat_154")]; tensor concat_155_values1_0 = const()[name = string("concat_155_values1_0"), val = tensor([0])]; tensor concat_155_values3_0 = const()[name = string("concat_155_values3_0"), val = tensor([0])]; int32 concat_155_axis_0 = const()[name = string("concat_155_axis_0"), val = int32(0)]; bool concat_155_interleave_0 = const()[name = string("concat_155_interleave_0"), val = bool(false)]; tensor concat_155 = concat(axis = concat_155_axis_0, interleave = concat_155_interleave_0, values = (expand_dims_232, concat_155_values1_0, var_1725, concat_155_values3_0))[name = string("concat_155")]; tensor model_model_kv_cache_0_internal_tensor_assign_39_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_39_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_39_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_39_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_39_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_39_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_39_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_39_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_39_cast_fp16 = slice_update(begin = concat_154, begin_mask = model_model_kv_cache_0_internal_tensor_assign_39_begin_mask_0, end = concat_155, end_mask = model_model_kv_cache_0_internal_tensor_assign_39_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_39_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_39_stride_0, update = key_states_77, x = coreml_update_state_93)[name = string("model_model_kv_cache_0_internal_tensor_assign_39_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_39_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_38_write_state")]; tensor coreml_update_state_94 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_38")]; tensor expand_dims_234 = const()[name = string("expand_dims_234"), val = tensor([47])]; tensor expand_dims_235 = const()[name = string("expand_dims_235"), val = tensor([0])]; tensor expand_dims_237 = const()[name = string("expand_dims_237"), val = tensor([0])]; tensor expand_dims_238 = const()[name = string("expand_dims_238"), val = tensor([48])]; int32 concat_158_axis_0 = const()[name = string("concat_158_axis_0"), val = int32(0)]; bool concat_158_interleave_0 = const()[name = string("concat_158_interleave_0"), val = bool(false)]; tensor concat_158 = concat(axis = concat_158_axis_0, interleave = concat_158_interleave_0, values = (expand_dims_234, expand_dims_235, current_pos, expand_dims_237))[name = string("concat_158")]; tensor concat_159_values1_0 = const()[name = string("concat_159_values1_0"), val = tensor([0])]; tensor concat_159_values3_0 = const()[name = string("concat_159_values3_0"), val = tensor([0])]; int32 concat_159_axis_0 = const()[name = string("concat_159_axis_0"), val = int32(0)]; bool concat_159_interleave_0 = const()[name = string("concat_159_interleave_0"), val = bool(false)]; tensor concat_159 = concat(axis = concat_159_axis_0, interleave = concat_159_interleave_0, values = (expand_dims_238, concat_159_values1_0, var_1725, concat_159_values3_0))[name = string("concat_159")]; tensor model_model_kv_cache_0_internal_tensor_assign_40_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_40_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_40_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_40_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_40_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_40_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_40_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_40_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_40_cast_fp16 = slice_update(begin = concat_158, begin_mask = model_model_kv_cache_0_internal_tensor_assign_40_begin_mask_0, end = concat_159, end_mask = model_model_kv_cache_0_internal_tensor_assign_40_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_40_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_40_stride_0, update = var_10517, x = coreml_update_state_94)[name = string("model_model_kv_cache_0_internal_tensor_assign_40_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_40_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_39_write_state")]; tensor coreml_update_state_95 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_39")]; tensor var_10667_begin_0 = const()[name = string("op_10667_begin_0"), val = tensor([19, 0, 0, 0])]; tensor var_10667_end_0 = const()[name = string("op_10667_end_0"), val = tensor([20, 8, 1024, 128])]; tensor var_10667_end_mask_0 = const()[name = string("op_10667_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_10667_cast_fp16 = slice_by_index(begin = var_10667_begin_0, end = var_10667_end_0, end_mask = var_10667_end_mask_0, x = coreml_update_state_95)[name = string("op_10667_cast_fp16")]; tensor K_layer_cache_39_axes_0 = const()[name = string("K_layer_cache_39_axes_0"), val = tensor([0])]; tensor K_layer_cache_39_cast_fp16 = squeeze(axes = K_layer_cache_39_axes_0, x = var_10667_cast_fp16)[name = string("K_layer_cache_39_cast_fp16")]; tensor var_10674_begin_0 = const()[name = string("op_10674_begin_0"), val = tensor([47, 0, 0, 0])]; tensor var_10674_end_0 = const()[name = string("op_10674_end_0"), val = tensor([48, 8, 1024, 128])]; tensor var_10674_end_mask_0 = const()[name = string("op_10674_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_10674_cast_fp16 = slice_by_index(begin = var_10674_begin_0, end = var_10674_end_0, end_mask = var_10674_end_mask_0, x = coreml_update_state_95)[name = string("op_10674_cast_fp16")]; tensor V_layer_cache_39_axes_0 = const()[name = string("V_layer_cache_39_axes_0"), val = tensor([0])]; tensor V_layer_cache_39_cast_fp16 = squeeze(axes = V_layer_cache_39_axes_0, x = var_10674_cast_fp16)[name = string("V_layer_cache_39_cast_fp16")]; tensor x_387_axes_0 = const()[name = string("x_387_axes_0"), val = tensor([1])]; tensor x_387_cast_fp16 = expand_dims(axes = x_387_axes_0, x = K_layer_cache_39_cast_fp16)[name = string("x_387_cast_fp16")]; tensor var_10711 = const()[name = string("op_10711"), val = tensor([1, 2, 1, 1])]; tensor x_389_cast_fp16 = tile(reps = var_10711, x = x_387_cast_fp16)[name = string("x_389_cast_fp16")]; tensor var_10723 = const()[name = string("op_10723"), val = tensor([1, -1, 1024, 128])]; tensor key_states_79_cast_fp16 = reshape(shape = var_10723, x = x_389_cast_fp16)[name = string("key_states_79_cast_fp16")]; tensor x_393_axes_0 = const()[name = string("x_393_axes_0"), val = tensor([1])]; tensor x_393_cast_fp16 = expand_dims(axes = x_393_axes_0, x = V_layer_cache_39_cast_fp16)[name = string("x_393_cast_fp16")]; tensor var_10731 = const()[name = string("op_10731"), val = tensor([1, 2, 1, 1])]; tensor x_395_cast_fp16 = tile(reps = var_10731, x = x_393_cast_fp16)[name = string("x_395_cast_fp16")]; tensor var_10743 = const()[name = string("op_10743"), val = tensor([1, -1, 1024, 128])]; tensor value_states_117_cast_fp16 = reshape(shape = var_10743, x = x_395_cast_fp16)[name = string("value_states_117_cast_fp16")]; bool var_10758_transpose_x_1 = const()[name = string("op_10758_transpose_x_1"), val = bool(false)]; bool var_10758_transpose_y_1 = const()[name = string("op_10758_transpose_y_1"), val = bool(true)]; tensor var_10758 = matmul(transpose_x = var_10758_transpose_x_1, transpose_y = var_10758_transpose_y_1, x = query_states_77, y = key_states_79_cast_fp16)[name = string("op_10758")]; fp16 var_10759_to_fp16 = const()[name = string("op_10759_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_115_cast_fp16 = mul(x = var_10758, y = var_10759_to_fp16)[name = string("attn_weights_115_cast_fp16")]; tensor attn_weights_117_cast_fp16 = add(x = attn_weights_115_cast_fp16, y = causal_mask)[name = string("attn_weights_117_cast_fp16")]; int32 var_10794 = const()[name = string("op_10794"), val = int32(-1)]; tensor attn_weights_119_cast_fp16 = softmax(axis = var_10794, x = attn_weights_117_cast_fp16)[name = string("attn_weights_119_cast_fp16")]; bool attn_output_191_transpose_x_0 = const()[name = string("attn_output_191_transpose_x_0"), val = bool(false)]; bool attn_output_191_transpose_y_0 = const()[name = string("attn_output_191_transpose_y_0"), val = bool(false)]; tensor attn_output_191_cast_fp16 = matmul(transpose_x = attn_output_191_transpose_x_0, transpose_y = attn_output_191_transpose_y_0, x = attn_weights_119_cast_fp16, y = value_states_117_cast_fp16)[name = string("attn_output_191_cast_fp16")]; tensor var_10805_perm_0 = const()[name = string("op_10805_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_10809 = const()[name = string("op_10809"), val = tensor([1, 1, 2048])]; tensor var_10805_cast_fp16 = transpose(perm = var_10805_perm_0, x = attn_output_191_cast_fp16)[name = string("transpose_52")]; tensor attn_output_195_cast_fp16 = reshape(shape = var_10809, x = var_10805_cast_fp16)[name = string("attn_output_195_cast_fp16")]; tensor var_10814 = const()[name = string("op_10814"), val = tensor([0, 2, 1])]; string var_10830_pad_type_0 = const()[name = string("op_10830_pad_type_0"), val = string("valid")]; int32 var_10830_groups_0 = const()[name = string("op_10830_groups_0"), val = int32(1)]; tensor var_10830_strides_0 = const()[name = string("op_10830_strides_0"), val = tensor([1])]; tensor var_10830_pad_0 = const()[name = string("op_10830_pad_0"), val = tensor([0, 0])]; tensor var_10830_dilations_0 = const()[name = string("op_10830_dilations_0"), val = tensor([1])]; tensor squeeze_19_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1071286784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1074432576))))[name = string("squeeze_19_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_10815_cast_fp16 = transpose(perm = var_10814, x = attn_output_195_cast_fp16)[name = string("transpose_51")]; tensor var_10830_cast_fp16 = conv(dilations = var_10830_dilations_0, groups = var_10830_groups_0, pad = var_10830_pad_0, pad_type = var_10830_pad_type_0, strides = var_10830_strides_0, weight = squeeze_19_cast_fp16_to_fp32_to_fp16_palettized, x = var_10815_cast_fp16)[name = string("op_10830_cast_fp16")]; tensor var_10834 = const()[name = string("op_10834"), val = tensor([0, 2, 1])]; tensor attn_output_199_cast_fp16 = transpose(perm = var_10834, x = var_10830_cast_fp16)[name = string("transpose_50")]; tensor hidden_states_119_cast_fp16 = add(x = hidden_states_115_cast_fp16, y = attn_output_199_cast_fp16)[name = string("hidden_states_119_cast_fp16")]; tensor mean_159_axes_0 = const()[name = string("mean_159_axes_0"), val = tensor([-1])]; bool mean_159_keep_dims_0 = const()[name = string("mean_159_keep_dims_0"), val = bool(true)]; tensor mean_159_cast_fp16 = reduce_mean(axes = mean_159_axes_0, keep_dims = mean_159_keep_dims_0, x = hidden_states_119_cast_fp16)[name = string("mean_159_cast_fp16")]; tensor input_353_cast_fp16 = sub(x = hidden_states_119_cast_fp16, y = mean_159_cast_fp16)[name = string("input_353_cast_fp16")]; tensor var_10853_axes_0 = const()[name = string("op_10853_axes_0"), val = tensor([-1])]; tensor model_model_layers_19_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_19_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1074465408)))]; fp16 var_10841_to_fp16 = const()[name = string("op_10841_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10853_cast_fp16 = layer_norm(axes = var_10853_axes_0, epsilon = var_10841_to_fp16, gamma = model_model_layers_19_post_attention_layernorm_weight_to_fp16, x = input_353_cast_fp16)[name = string("op_10853_cast_fp16")]; tensor var_10867 = const()[name = string("op_10867"), val = tensor([0, 2, 1])]; tensor input_355_axes_0 = const()[name = string("input_355_axes_0"), val = tensor([2])]; tensor var_10868 = transpose(perm = var_10867, x = var_10853_cast_fp16)[name = string("transpose_49")]; tensor input_355 = expand_dims(axes = input_355_axes_0, x = var_10868)[name = string("input_355")]; string input_357_pad_type_0 = const()[name = string("input_357_pad_type_0"), val = string("valid")]; tensor input_357_strides_0 = const()[name = string("input_357_strides_0"), val = tensor([1, 1])]; tensor input_357_pad_0 = const()[name = string("input_357_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_357_dilations_0 = const()[name = string("input_357_dilations_0"), val = tensor([1, 1])]; int32 input_357_groups_0 = const()[name = string("input_357_groups_0"), val = int32(1)]; tensor input_357 = conv(dilations = input_357_dilations_0, groups = input_357_groups_0, pad = input_357_pad_0, pad_type = input_357_pad_type_0, strides = input_357_strides_0, weight = model_model_layers_19_mlp_gate_proj_weight_palettized, x = input_355)[name = string("input_357")]; string b_39_pad_type_0 = const()[name = string("b_39_pad_type_0"), val = string("valid")]; tensor b_39_strides_0 = const()[name = string("b_39_strides_0"), val = tensor([1, 1])]; tensor b_39_pad_0 = const()[name = string("b_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_39_dilations_0 = const()[name = string("b_39_dilations_0"), val = tensor([1, 1])]; int32 b_39_groups_0 = const()[name = string("b_39_groups_0"), val = int32(1)]; tensor b_39 = conv(dilations = b_39_dilations_0, groups = b_39_groups_0, pad = b_39_pad_0, pad_type = b_39_pad_type_0, strides = b_39_strides_0, weight = model_model_layers_19_mlp_up_proj_weight_palettized, x = input_355)[name = string("b_39")]; tensor c_39 = silu(x = input_357)[name = string("c_39")]; tensor input_359 = mul(x = c_39, y = b_39)[name = string("input_359")]; string e_39_pad_type_0 = const()[name = string("e_39_pad_type_0"), val = string("valid")]; tensor e_39_strides_0 = const()[name = string("e_39_strides_0"), val = tensor([1, 1])]; tensor e_39_pad_0 = const()[name = string("e_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_39_dilations_0 = const()[name = string("e_39_dilations_0"), val = tensor([1, 1])]; int32 e_39_groups_0 = const()[name = string("e_39_groups_0"), val = int32(1)]; tensor e_39 = conv(dilations = e_39_dilations_0, groups = e_39_groups_0, pad = e_39_pad_0, pad_type = e_39_pad_type_0, strides = e_39_strides_0, weight = model_model_layers_19_mlp_down_proj_weight_palettized, x = input_359)[name = string("e_39")]; tensor var_10890_axes_0 = const()[name = string("op_10890_axes_0"), val = tensor([2])]; tensor var_10890 = squeeze(axes = var_10890_axes_0, x = e_39)[name = string("op_10890")]; tensor var_10891 = const()[name = string("op_10891"), val = tensor([0, 2, 1])]; tensor var_10892 = transpose(perm = var_10891, x = var_10890)[name = string("transpose_48")]; tensor hidden_states_121_cast_fp16 = add(x = hidden_states_119_cast_fp16, y = var_10892)[name = string("hidden_states_121_cast_fp16")]; tensor mean_161_axes_0 = const()[name = string("mean_161_axes_0"), val = tensor([-1])]; bool mean_161_keep_dims_0 = const()[name = string("mean_161_keep_dims_0"), val = bool(true)]; tensor mean_161_cast_fp16 = reduce_mean(axes = mean_161_axes_0, keep_dims = mean_161_keep_dims_0, x = hidden_states_121_cast_fp16)[name = string("mean_161_cast_fp16")]; tensor input_361_cast_fp16 = sub(x = hidden_states_121_cast_fp16, y = mean_161_cast_fp16)[name = string("input_361_cast_fp16")]; tensor var_10910_axes_0 = const()[name = string("op_10910_axes_0"), val = tensor([-1])]; tensor model_model_layers_20_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_20_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1074469568)))]; fp16 var_10898_to_fp16 = const()[name = string("op_10898_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10910_cast_fp16 = layer_norm(axes = var_10910_axes_0, epsilon = var_10898_to_fp16, gamma = model_model_layers_20_input_layernorm_weight_to_fp16, x = input_361_cast_fp16)[name = string("op_10910_cast_fp16")]; tensor var_10916 = const()[name = string("op_10916"), val = tensor([0, 2, 1])]; tensor var_10919_axes_0 = const()[name = string("op_10919_axes_0"), val = tensor([2])]; tensor var_10917 = transpose(perm = var_10916, x = var_10910_cast_fp16)[name = string("transpose_47")]; tensor var_10919 = expand_dims(axes = var_10919_axes_0, x = var_10917)[name = string("op_10919")]; string var_10935_pad_type_0 = const()[name = string("op_10935_pad_type_0"), val = string("valid")]; tensor var_10935_strides_0 = const()[name = string("op_10935_strides_0"), val = tensor([1, 1])]; tensor var_10935_pad_0 = const()[name = string("op_10935_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_10935_dilations_0 = const()[name = string("op_10935_dilations_0"), val = tensor([1, 1])]; int32 var_10935_groups_0 = const()[name = string("op_10935_groups_0"), val = int32(1)]; tensor var_10935 = conv(dilations = var_10935_dilations_0, groups = var_10935_groups_0, pad = var_10935_pad_0, pad_type = var_10935_pad_type_0, strides = var_10935_strides_0, weight = model_model_layers_20_self_attn_q_proj_weight_palettized, x = var_10919)[name = string("op_10935")]; tensor var_10940 = const()[name = string("op_10940"), val = tensor([1, 16, 1, 128])]; tensor var_10941 = reshape(shape = var_10940, x = var_10935)[name = string("op_10941")]; string var_10957_pad_type_0 = const()[name = string("op_10957_pad_type_0"), val = string("valid")]; tensor var_10957_strides_0 = const()[name = string("op_10957_strides_0"), val = tensor([1, 1])]; tensor var_10957_pad_0 = const()[name = string("op_10957_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_10957_dilations_0 = const()[name = string("op_10957_dilations_0"), val = tensor([1, 1])]; int32 var_10957_groups_0 = const()[name = string("op_10957_groups_0"), val = int32(1)]; tensor var_10957 = conv(dilations = var_10957_dilations_0, groups = var_10957_groups_0, pad = var_10957_pad_0, pad_type = var_10957_pad_type_0, strides = var_10957_strides_0, weight = model_model_layers_20_self_attn_k_proj_weight_palettized, x = var_10919)[name = string("op_10957")]; tensor var_10962 = const()[name = string("op_10962"), val = tensor([1, 8, 1, 128])]; tensor var_10963 = reshape(shape = var_10962, x = var_10957)[name = string("op_10963")]; string var_10979_pad_type_0 = const()[name = string("op_10979_pad_type_0"), val = string("valid")]; tensor var_10979_strides_0 = const()[name = string("op_10979_strides_0"), val = tensor([1, 1])]; tensor var_10979_pad_0 = const()[name = string("op_10979_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_10979_dilations_0 = const()[name = string("op_10979_dilations_0"), val = tensor([1, 1])]; int32 var_10979_groups_0 = const()[name = string("op_10979_groups_0"), val = int32(1)]; tensor var_10979 = conv(dilations = var_10979_dilations_0, groups = var_10979_groups_0, pad = var_10979_pad_0, pad_type = var_10979_pad_type_0, strides = var_10979_strides_0, weight = model_model_layers_20_self_attn_v_proj_weight_palettized, x = var_10919)[name = string("op_10979")]; tensor var_10984 = const()[name = string("op_10984"), val = tensor([1, 8, 1, 128])]; tensor var_10985 = reshape(shape = var_10984, x = var_10979)[name = string("op_10985")]; tensor mean_163_axes_0 = const()[name = string("mean_163_axes_0"), val = tensor([-1])]; bool mean_163_keep_dims_0 = const()[name = string("mean_163_keep_dims_0"), val = bool(true)]; tensor mean_163 = reduce_mean(axes = mean_163_axes_0, keep_dims = mean_163_keep_dims_0, x = var_10941)[name = string("mean_163")]; tensor input_365 = sub(x = var_10941, y = mean_163)[name = string("input_365")]; tensor var_11006_axes_0 = const()[name = string("op_11006_axes_0"), val = tensor([-1])]; tensor model_model_layers_20_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_20_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1074473728)))]; fp16 var_10994_to_fp16 = const()[name = string("op_10994_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_11006_cast_fp16 = layer_norm(axes = var_11006_axes_0, epsilon = var_10994_to_fp16, gamma = model_model_layers_20_self_attn_q_norm_weight_to_fp16, x = input_365)[name = string("op_11006_cast_fp16")]; tensor mean_165_axes_0 = const()[name = string("mean_165_axes_0"), val = tensor([-1])]; bool mean_165_keep_dims_0 = const()[name = string("mean_165_keep_dims_0"), val = bool(true)]; tensor mean_165 = reduce_mean(axes = mean_165_axes_0, keep_dims = mean_165_keep_dims_0, x = var_10963)[name = string("mean_165")]; tensor input_367 = sub(x = var_10963, y = mean_165)[name = string("input_367")]; tensor var_11024_axes_0 = const()[name = string("op_11024_axes_0"), val = tensor([-1])]; tensor model_model_layers_20_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_20_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1074474048)))]; fp16 var_11012_to_fp16 = const()[name = string("op_11012_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_11024_cast_fp16 = layer_norm(axes = var_11024_axes_0, epsilon = var_11012_to_fp16, gamma = model_model_layers_20_self_attn_k_norm_weight_to_fp16, x = input_367)[name = string("op_11024_cast_fp16")]; tensor var_11027 = mul(x = var_11006_cast_fp16, y = cos_1_cast_fp16)[name = string("op_11027")]; tensor x1_81_begin_0 = const()[name = string("x1_81_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_81_end_0 = const()[name = string("x1_81_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_81_end_mask_0 = const()[name = string("x1_81_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_81 = slice_by_index(begin = x1_81_begin_0, end = x1_81_end_0, end_mask = x1_81_end_mask_0, x = var_11006_cast_fp16)[name = string("x1_81")]; tensor x2_81_begin_0 = const()[name = string("x2_81_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_81_end_0 = const()[name = string("x2_81_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_81_end_mask_0 = const()[name = string("x2_81_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_81 = slice_by_index(begin = x2_81_begin_0, end = x2_81_end_0, end_mask = x2_81_end_mask_0, x = var_11006_cast_fp16)[name = string("x2_81")]; fp16 const_365_promoted = const()[name = string("const_365_promoted"), val = fp16(-0x1p+0)]; tensor var_11048 = mul(x = x2_81, y = const_365_promoted)[name = string("op_11048")]; int32 var_11050 = const()[name = string("op_11050"), val = int32(-1)]; bool var_11051_interleave_0 = const()[name = string("op_11051_interleave_0"), val = bool(false)]; tensor var_11051 = concat(axis = var_11050, interleave = var_11051_interleave_0, values = (var_11048, x1_81))[name = string("op_11051")]; tensor var_11052 = mul(x = var_11051, y = sin_1_cast_fp16)[name = string("op_11052")]; tensor query_states_81 = add(x = var_11027, y = var_11052)[name = string("query_states_81")]; tensor var_11055 = mul(x = var_11024_cast_fp16, y = cos_1_cast_fp16)[name = string("op_11055")]; tensor x1_83_begin_0 = const()[name = string("x1_83_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_83_end_0 = const()[name = string("x1_83_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_83_end_mask_0 = const()[name = string("x1_83_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_83 = slice_by_index(begin = x1_83_begin_0, end = x1_83_end_0, end_mask = x1_83_end_mask_0, x = var_11024_cast_fp16)[name = string("x1_83")]; tensor x2_83_begin_0 = const()[name = string("x2_83_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_83_end_0 = const()[name = string("x2_83_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_83_end_mask_0 = const()[name = string("x2_83_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_83 = slice_by_index(begin = x2_83_begin_0, end = x2_83_end_0, end_mask = x2_83_end_mask_0, x = var_11024_cast_fp16)[name = string("x2_83")]; fp16 const_368_promoted = const()[name = string("const_368_promoted"), val = fp16(-0x1p+0)]; tensor var_11076 = mul(x = x2_83, y = const_368_promoted)[name = string("op_11076")]; int32 var_11078 = const()[name = string("op_11078"), val = int32(-1)]; bool var_11079_interleave_0 = const()[name = string("op_11079_interleave_0"), val = bool(false)]; tensor var_11079 = concat(axis = var_11078, interleave = var_11079_interleave_0, values = (var_11076, x1_83))[name = string("op_11079")]; tensor var_11080 = mul(x = var_11079, y = sin_1_cast_fp16)[name = string("op_11080")]; tensor key_states_81 = add(x = var_11055, y = var_11080)[name = string("key_states_81")]; tensor expand_dims_240 = const()[name = string("expand_dims_240"), val = tensor([20])]; tensor expand_dims_241 = const()[name = string("expand_dims_241"), val = tensor([0])]; tensor expand_dims_243 = const()[name = string("expand_dims_243"), val = tensor([0])]; tensor expand_dims_244 = const()[name = string("expand_dims_244"), val = tensor([21])]; int32 concat_162_axis_0 = const()[name = string("concat_162_axis_0"), val = int32(0)]; bool concat_162_interleave_0 = const()[name = string("concat_162_interleave_0"), val = bool(false)]; tensor concat_162 = concat(axis = concat_162_axis_0, interleave = concat_162_interleave_0, values = (expand_dims_240, expand_dims_241, current_pos, expand_dims_243))[name = string("concat_162")]; tensor concat_163_values1_0 = const()[name = string("concat_163_values1_0"), val = tensor([0])]; tensor concat_163_values3_0 = const()[name = string("concat_163_values3_0"), val = tensor([0])]; int32 concat_163_axis_0 = const()[name = string("concat_163_axis_0"), val = int32(0)]; bool concat_163_interleave_0 = const()[name = string("concat_163_interleave_0"), val = bool(false)]; tensor concat_163 = concat(axis = concat_163_axis_0, interleave = concat_163_interleave_0, values = (expand_dims_244, concat_163_values1_0, var_1725, concat_163_values3_0))[name = string("concat_163")]; tensor model_model_kv_cache_0_internal_tensor_assign_41_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_41_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_41_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_41_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_41_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_41_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_41_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_41_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_41_cast_fp16 = slice_update(begin = concat_162, begin_mask = model_model_kv_cache_0_internal_tensor_assign_41_begin_mask_0, end = concat_163, end_mask = model_model_kv_cache_0_internal_tensor_assign_41_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_41_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_41_stride_0, update = key_states_81, x = coreml_update_state_95)[name = string("model_model_kv_cache_0_internal_tensor_assign_41_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_41_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_40_write_state")]; tensor coreml_update_state_96 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_40")]; tensor expand_dims_246 = const()[name = string("expand_dims_246"), val = tensor([48])]; tensor expand_dims_247 = const()[name = string("expand_dims_247"), val = tensor([0])]; tensor expand_dims_249 = const()[name = string("expand_dims_249"), val = tensor([0])]; tensor expand_dims_250 = const()[name = string("expand_dims_250"), val = tensor([49])]; int32 concat_166_axis_0 = const()[name = string("concat_166_axis_0"), val = int32(0)]; bool concat_166_interleave_0 = const()[name = string("concat_166_interleave_0"), val = bool(false)]; tensor concat_166 = concat(axis = concat_166_axis_0, interleave = concat_166_interleave_0, values = (expand_dims_246, expand_dims_247, current_pos, expand_dims_249))[name = string("concat_166")]; tensor concat_167_values1_0 = const()[name = string("concat_167_values1_0"), val = tensor([0])]; tensor concat_167_values3_0 = const()[name = string("concat_167_values3_0"), val = tensor([0])]; int32 concat_167_axis_0 = const()[name = string("concat_167_axis_0"), val = int32(0)]; bool concat_167_interleave_0 = const()[name = string("concat_167_interleave_0"), val = bool(false)]; tensor concat_167 = concat(axis = concat_167_axis_0, interleave = concat_167_interleave_0, values = (expand_dims_250, concat_167_values1_0, var_1725, concat_167_values3_0))[name = string("concat_167")]; tensor model_model_kv_cache_0_internal_tensor_assign_42_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_42_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_42_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_42_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_42_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_42_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_42_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_42_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_42_cast_fp16 = slice_update(begin = concat_166, begin_mask = model_model_kv_cache_0_internal_tensor_assign_42_begin_mask_0, end = concat_167, end_mask = model_model_kv_cache_0_internal_tensor_assign_42_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_42_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_42_stride_0, update = var_10985, x = coreml_update_state_96)[name = string("model_model_kv_cache_0_internal_tensor_assign_42_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_42_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_41_write_state")]; tensor coreml_update_state_97 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_41")]; tensor var_11135_begin_0 = const()[name = string("op_11135_begin_0"), val = tensor([20, 0, 0, 0])]; tensor var_11135_end_0 = const()[name = string("op_11135_end_0"), val = tensor([21, 8, 1024, 128])]; tensor var_11135_end_mask_0 = const()[name = string("op_11135_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_11135_cast_fp16 = slice_by_index(begin = var_11135_begin_0, end = var_11135_end_0, end_mask = var_11135_end_mask_0, x = coreml_update_state_97)[name = string("op_11135_cast_fp16")]; tensor K_layer_cache_41_axes_0 = const()[name = string("K_layer_cache_41_axes_0"), val = tensor([0])]; tensor K_layer_cache_41_cast_fp16 = squeeze(axes = K_layer_cache_41_axes_0, x = var_11135_cast_fp16)[name = string("K_layer_cache_41_cast_fp16")]; tensor var_11142_begin_0 = const()[name = string("op_11142_begin_0"), val = tensor([48, 0, 0, 0])]; tensor var_11142_end_0 = const()[name = string("op_11142_end_0"), val = tensor([49, 8, 1024, 128])]; tensor var_11142_end_mask_0 = const()[name = string("op_11142_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_11142_cast_fp16 = slice_by_index(begin = var_11142_begin_0, end = var_11142_end_0, end_mask = var_11142_end_mask_0, x = coreml_update_state_97)[name = string("op_11142_cast_fp16")]; tensor V_layer_cache_41_axes_0 = const()[name = string("V_layer_cache_41_axes_0"), val = tensor([0])]; tensor V_layer_cache_41_cast_fp16 = squeeze(axes = V_layer_cache_41_axes_0, x = var_11142_cast_fp16)[name = string("V_layer_cache_41_cast_fp16")]; tensor x_407_axes_0 = const()[name = string("x_407_axes_0"), val = tensor([1])]; tensor x_407_cast_fp16 = expand_dims(axes = x_407_axes_0, x = K_layer_cache_41_cast_fp16)[name = string("x_407_cast_fp16")]; tensor var_11179 = const()[name = string("op_11179"), val = tensor([1, 2, 1, 1])]; tensor x_409_cast_fp16 = tile(reps = var_11179, x = x_407_cast_fp16)[name = string("x_409_cast_fp16")]; tensor var_11191 = const()[name = string("op_11191"), val = tensor([1, -1, 1024, 128])]; tensor key_states_83_cast_fp16 = reshape(shape = var_11191, x = x_409_cast_fp16)[name = string("key_states_83_cast_fp16")]; tensor x_413_axes_0 = const()[name = string("x_413_axes_0"), val = tensor([1])]; tensor x_413_cast_fp16 = expand_dims(axes = x_413_axes_0, x = V_layer_cache_41_cast_fp16)[name = string("x_413_cast_fp16")]; tensor var_11199 = const()[name = string("op_11199"), val = tensor([1, 2, 1, 1])]; tensor x_415_cast_fp16 = tile(reps = var_11199, x = x_413_cast_fp16)[name = string("x_415_cast_fp16")]; tensor var_11211 = const()[name = string("op_11211"), val = tensor([1, -1, 1024, 128])]; tensor value_states_123_cast_fp16 = reshape(shape = var_11211, x = x_415_cast_fp16)[name = string("value_states_123_cast_fp16")]; bool var_11226_transpose_x_1 = const()[name = string("op_11226_transpose_x_1"), val = bool(false)]; bool var_11226_transpose_y_1 = const()[name = string("op_11226_transpose_y_1"), val = bool(true)]; tensor var_11226 = matmul(transpose_x = var_11226_transpose_x_1, transpose_y = var_11226_transpose_y_1, x = query_states_81, y = key_states_83_cast_fp16)[name = string("op_11226")]; fp16 var_11227_to_fp16 = const()[name = string("op_11227_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_121_cast_fp16 = mul(x = var_11226, y = var_11227_to_fp16)[name = string("attn_weights_121_cast_fp16")]; tensor attn_weights_123_cast_fp16 = add(x = attn_weights_121_cast_fp16, y = causal_mask)[name = string("attn_weights_123_cast_fp16")]; int32 var_11262 = const()[name = string("op_11262"), val = int32(-1)]; tensor attn_weights_125_cast_fp16 = softmax(axis = var_11262, x = attn_weights_123_cast_fp16)[name = string("attn_weights_125_cast_fp16")]; bool attn_output_201_transpose_x_0 = const()[name = string("attn_output_201_transpose_x_0"), val = bool(false)]; bool attn_output_201_transpose_y_0 = const()[name = string("attn_output_201_transpose_y_0"), val = bool(false)]; tensor attn_output_201_cast_fp16 = matmul(transpose_x = attn_output_201_transpose_x_0, transpose_y = attn_output_201_transpose_y_0, x = attn_weights_125_cast_fp16, y = value_states_123_cast_fp16)[name = string("attn_output_201_cast_fp16")]; tensor var_11273_perm_0 = const()[name = string("op_11273_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_11277 = const()[name = string("op_11277"), val = tensor([1, 1, 2048])]; tensor var_11273_cast_fp16 = transpose(perm = var_11273_perm_0, x = attn_output_201_cast_fp16)[name = string("transpose_46")]; tensor attn_output_205_cast_fp16 = reshape(shape = var_11277, x = var_11273_cast_fp16)[name = string("attn_output_205_cast_fp16")]; tensor var_11282 = const()[name = string("op_11282"), val = tensor([0, 2, 1])]; string var_11298_pad_type_0 = const()[name = string("op_11298_pad_type_0"), val = string("valid")]; int32 var_11298_groups_0 = const()[name = string("op_11298_groups_0"), val = int32(1)]; tensor var_11298_strides_0 = const()[name = string("op_11298_strides_0"), val = tensor([1])]; tensor var_11298_pad_0 = const()[name = string("op_11298_pad_0"), val = tensor([0, 0])]; tensor var_11298_dilations_0 = const()[name = string("op_11298_dilations_0"), val = tensor([1])]; tensor squeeze_20_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1074474368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1077620160))))[name = string("squeeze_20_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_11283_cast_fp16 = transpose(perm = var_11282, x = attn_output_205_cast_fp16)[name = string("transpose_45")]; tensor var_11298_cast_fp16 = conv(dilations = var_11298_dilations_0, groups = var_11298_groups_0, pad = var_11298_pad_0, pad_type = var_11298_pad_type_0, strides = var_11298_strides_0, weight = squeeze_20_cast_fp16_to_fp32_to_fp16_palettized, x = var_11283_cast_fp16)[name = string("op_11298_cast_fp16")]; tensor var_11302 = const()[name = string("op_11302"), val = tensor([0, 2, 1])]; tensor attn_output_209_cast_fp16 = transpose(perm = var_11302, x = var_11298_cast_fp16)[name = string("transpose_44")]; tensor hidden_states_125_cast_fp16 = add(x = hidden_states_121_cast_fp16, y = attn_output_209_cast_fp16)[name = string("hidden_states_125_cast_fp16")]; tensor mean_167_axes_0 = const()[name = string("mean_167_axes_0"), val = tensor([-1])]; bool mean_167_keep_dims_0 = const()[name = string("mean_167_keep_dims_0"), val = bool(true)]; tensor mean_167_cast_fp16 = reduce_mean(axes = mean_167_axes_0, keep_dims = mean_167_keep_dims_0, x = hidden_states_125_cast_fp16)[name = string("mean_167_cast_fp16")]; tensor input_371_cast_fp16 = sub(x = hidden_states_125_cast_fp16, y = mean_167_cast_fp16)[name = string("input_371_cast_fp16")]; tensor var_11321_axes_0 = const()[name = string("op_11321_axes_0"), val = tensor([-1])]; tensor model_model_layers_20_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_20_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1077652992)))]; fp16 var_11309_to_fp16 = const()[name = string("op_11309_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_11321_cast_fp16 = layer_norm(axes = var_11321_axes_0, epsilon = var_11309_to_fp16, gamma = model_model_layers_20_post_attention_layernorm_weight_to_fp16, x = input_371_cast_fp16)[name = string("op_11321_cast_fp16")]; tensor var_11335 = const()[name = string("op_11335"), val = tensor([0, 2, 1])]; tensor input_373_axes_0 = const()[name = string("input_373_axes_0"), val = tensor([2])]; tensor var_11336 = transpose(perm = var_11335, x = var_11321_cast_fp16)[name = string("transpose_43")]; tensor input_373 = expand_dims(axes = input_373_axes_0, x = var_11336)[name = string("input_373")]; string input_375_pad_type_0 = const()[name = string("input_375_pad_type_0"), val = string("valid")]; tensor input_375_strides_0 = const()[name = string("input_375_strides_0"), val = tensor([1, 1])]; tensor input_375_pad_0 = const()[name = string("input_375_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_375_dilations_0 = const()[name = string("input_375_dilations_0"), val = tensor([1, 1])]; int32 input_375_groups_0 = const()[name = string("input_375_groups_0"), val = int32(1)]; tensor input_375 = conv(dilations = input_375_dilations_0, groups = input_375_groups_0, pad = input_375_pad_0, pad_type = input_375_pad_type_0, strides = input_375_strides_0, weight = model_model_layers_20_mlp_gate_proj_weight_palettized, x = input_373)[name = string("input_375")]; string b_41_pad_type_0 = const()[name = string("b_41_pad_type_0"), val = string("valid")]; tensor b_41_strides_0 = const()[name = string("b_41_strides_0"), val = tensor([1, 1])]; tensor b_41_pad_0 = const()[name = string("b_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_41_dilations_0 = const()[name = string("b_41_dilations_0"), val = tensor([1, 1])]; int32 b_41_groups_0 = const()[name = string("b_41_groups_0"), val = int32(1)]; tensor b_41 = conv(dilations = b_41_dilations_0, groups = b_41_groups_0, pad = b_41_pad_0, pad_type = b_41_pad_type_0, strides = b_41_strides_0, weight = model_model_layers_20_mlp_up_proj_weight_palettized, x = input_373)[name = string("b_41")]; tensor c_41 = silu(x = input_375)[name = string("c_41")]; tensor input_377 = mul(x = c_41, y = b_41)[name = string("input_377")]; string e_41_pad_type_0 = const()[name = string("e_41_pad_type_0"), val = string("valid")]; tensor e_41_strides_0 = const()[name = string("e_41_strides_0"), val = tensor([1, 1])]; tensor e_41_pad_0 = const()[name = string("e_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_41_dilations_0 = const()[name = string("e_41_dilations_0"), val = tensor([1, 1])]; int32 e_41_groups_0 = const()[name = string("e_41_groups_0"), val = int32(1)]; tensor e_41 = conv(dilations = e_41_dilations_0, groups = e_41_groups_0, pad = e_41_pad_0, pad_type = e_41_pad_type_0, strides = e_41_strides_0, weight = model_model_layers_20_mlp_down_proj_weight_palettized, x = input_377)[name = string("e_41")]; tensor var_11358_axes_0 = const()[name = string("op_11358_axes_0"), val = tensor([2])]; tensor var_11358 = squeeze(axes = var_11358_axes_0, x = e_41)[name = string("op_11358")]; tensor var_11359 = const()[name = string("op_11359"), val = tensor([0, 2, 1])]; tensor var_11360 = transpose(perm = var_11359, x = var_11358)[name = string("transpose_42")]; tensor hidden_states_127_cast_fp16 = add(x = hidden_states_125_cast_fp16, y = var_11360)[name = string("hidden_states_127_cast_fp16")]; tensor mean_169_axes_0 = const()[name = string("mean_169_axes_0"), val = tensor([-1])]; bool mean_169_keep_dims_0 = const()[name = string("mean_169_keep_dims_0"), val = bool(true)]; tensor mean_169_cast_fp16 = reduce_mean(axes = mean_169_axes_0, keep_dims = mean_169_keep_dims_0, x = hidden_states_127_cast_fp16)[name = string("mean_169_cast_fp16")]; tensor input_379_cast_fp16 = sub(x = hidden_states_127_cast_fp16, y = mean_169_cast_fp16)[name = string("input_379_cast_fp16")]; tensor var_11378_axes_0 = const()[name = string("op_11378_axes_0"), val = tensor([-1])]; tensor model_model_layers_21_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_21_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1077657152)))]; fp16 var_11366_to_fp16 = const()[name = string("op_11366_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_11378_cast_fp16 = layer_norm(axes = var_11378_axes_0, epsilon = var_11366_to_fp16, gamma = model_model_layers_21_input_layernorm_weight_to_fp16, x = input_379_cast_fp16)[name = string("op_11378_cast_fp16")]; tensor var_11384 = const()[name = string("op_11384"), val = tensor([0, 2, 1])]; tensor var_11387_axes_0 = const()[name = string("op_11387_axes_0"), val = tensor([2])]; tensor var_11385 = transpose(perm = var_11384, x = var_11378_cast_fp16)[name = string("transpose_41")]; tensor var_11387 = expand_dims(axes = var_11387_axes_0, x = var_11385)[name = string("op_11387")]; string var_11403_pad_type_0 = const()[name = string("op_11403_pad_type_0"), val = string("valid")]; tensor var_11403_strides_0 = const()[name = string("op_11403_strides_0"), val = tensor([1, 1])]; tensor var_11403_pad_0 = const()[name = string("op_11403_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_11403_dilations_0 = const()[name = string("op_11403_dilations_0"), val = tensor([1, 1])]; int32 var_11403_groups_0 = const()[name = string("op_11403_groups_0"), val = int32(1)]; tensor var_11403 = conv(dilations = var_11403_dilations_0, groups = var_11403_groups_0, pad = var_11403_pad_0, pad_type = var_11403_pad_type_0, strides = var_11403_strides_0, weight = model_model_layers_21_self_attn_q_proj_weight_palettized, x = var_11387)[name = string("op_11403")]; tensor var_11408 = const()[name = string("op_11408"), val = tensor([1, 16, 1, 128])]; tensor var_11409 = reshape(shape = var_11408, x = var_11403)[name = string("op_11409")]; string var_11425_pad_type_0 = const()[name = string("op_11425_pad_type_0"), val = string("valid")]; tensor var_11425_strides_0 = const()[name = string("op_11425_strides_0"), val = tensor([1, 1])]; tensor var_11425_pad_0 = const()[name = string("op_11425_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_11425_dilations_0 = const()[name = string("op_11425_dilations_0"), val = tensor([1, 1])]; int32 var_11425_groups_0 = const()[name = string("op_11425_groups_0"), val = int32(1)]; tensor var_11425 = conv(dilations = var_11425_dilations_0, groups = var_11425_groups_0, pad = var_11425_pad_0, pad_type = var_11425_pad_type_0, strides = var_11425_strides_0, weight = model_model_layers_21_self_attn_k_proj_weight_palettized, x = var_11387)[name = string("op_11425")]; tensor var_11430 = const()[name = string("op_11430"), val = tensor([1, 8, 1, 128])]; tensor var_11431 = reshape(shape = var_11430, x = var_11425)[name = string("op_11431")]; string var_11447_pad_type_0 = const()[name = string("op_11447_pad_type_0"), val = string("valid")]; tensor var_11447_strides_0 = const()[name = string("op_11447_strides_0"), val = tensor([1, 1])]; tensor var_11447_pad_0 = const()[name = string("op_11447_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_11447_dilations_0 = const()[name = string("op_11447_dilations_0"), val = tensor([1, 1])]; int32 var_11447_groups_0 = const()[name = string("op_11447_groups_0"), val = int32(1)]; tensor var_11447 = conv(dilations = var_11447_dilations_0, groups = var_11447_groups_0, pad = var_11447_pad_0, pad_type = var_11447_pad_type_0, strides = var_11447_strides_0, weight = model_model_layers_21_self_attn_v_proj_weight_palettized, x = var_11387)[name = string("op_11447")]; tensor var_11452 = const()[name = string("op_11452"), val = tensor([1, 8, 1, 128])]; tensor var_11453 = reshape(shape = var_11452, x = var_11447)[name = string("op_11453")]; tensor mean_171_axes_0 = const()[name = string("mean_171_axes_0"), val = tensor([-1])]; bool mean_171_keep_dims_0 = const()[name = string("mean_171_keep_dims_0"), val = bool(true)]; tensor mean_171 = reduce_mean(axes = mean_171_axes_0, keep_dims = mean_171_keep_dims_0, x = var_11409)[name = string("mean_171")]; tensor input_383 = sub(x = var_11409, y = mean_171)[name = string("input_383")]; tensor var_11474_axes_0 = const()[name = string("op_11474_axes_0"), val = tensor([-1])]; tensor model_model_layers_21_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_21_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1077661312)))]; fp16 var_11462_to_fp16 = const()[name = string("op_11462_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_11474_cast_fp16 = layer_norm(axes = var_11474_axes_0, epsilon = var_11462_to_fp16, gamma = model_model_layers_21_self_attn_q_norm_weight_to_fp16, x = input_383)[name = string("op_11474_cast_fp16")]; tensor mean_173_axes_0 = const()[name = string("mean_173_axes_0"), val = tensor([-1])]; bool mean_173_keep_dims_0 = const()[name = string("mean_173_keep_dims_0"), val = bool(true)]; tensor mean_173 = reduce_mean(axes = mean_173_axes_0, keep_dims = mean_173_keep_dims_0, x = var_11431)[name = string("mean_173")]; tensor input_385 = sub(x = var_11431, y = mean_173)[name = string("input_385")]; tensor var_11492_axes_0 = const()[name = string("op_11492_axes_0"), val = tensor([-1])]; tensor model_model_layers_21_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_21_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1077661632)))]; fp16 var_11480_to_fp16 = const()[name = string("op_11480_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_11492_cast_fp16 = layer_norm(axes = var_11492_axes_0, epsilon = var_11480_to_fp16, gamma = model_model_layers_21_self_attn_k_norm_weight_to_fp16, x = input_385)[name = string("op_11492_cast_fp16")]; tensor var_11495 = mul(x = var_11474_cast_fp16, y = cos_1_cast_fp16)[name = string("op_11495")]; tensor x1_85_begin_0 = const()[name = string("x1_85_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_85_end_0 = const()[name = string("x1_85_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_85_end_mask_0 = const()[name = string("x1_85_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_85 = slice_by_index(begin = x1_85_begin_0, end = x1_85_end_0, end_mask = x1_85_end_mask_0, x = var_11474_cast_fp16)[name = string("x1_85")]; tensor x2_85_begin_0 = const()[name = string("x2_85_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_85_end_0 = const()[name = string("x2_85_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_85_end_mask_0 = const()[name = string("x2_85_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_85 = slice_by_index(begin = x2_85_begin_0, end = x2_85_end_0, end_mask = x2_85_end_mask_0, x = var_11474_cast_fp16)[name = string("x2_85")]; fp16 const_383_promoted = const()[name = string("const_383_promoted"), val = fp16(-0x1p+0)]; tensor var_11516 = mul(x = x2_85, y = const_383_promoted)[name = string("op_11516")]; int32 var_11518 = const()[name = string("op_11518"), val = int32(-1)]; bool var_11519_interleave_0 = const()[name = string("op_11519_interleave_0"), val = bool(false)]; tensor var_11519 = concat(axis = var_11518, interleave = var_11519_interleave_0, values = (var_11516, x1_85))[name = string("op_11519")]; tensor var_11520 = mul(x = var_11519, y = sin_1_cast_fp16)[name = string("op_11520")]; tensor query_states_85 = add(x = var_11495, y = var_11520)[name = string("query_states_85")]; tensor var_11523 = mul(x = var_11492_cast_fp16, y = cos_1_cast_fp16)[name = string("op_11523")]; tensor x1_87_begin_0 = const()[name = string("x1_87_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_87_end_0 = const()[name = string("x1_87_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_87_end_mask_0 = const()[name = string("x1_87_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_87 = slice_by_index(begin = x1_87_begin_0, end = x1_87_end_0, end_mask = x1_87_end_mask_0, x = var_11492_cast_fp16)[name = string("x1_87")]; tensor x2_87_begin_0 = const()[name = string("x2_87_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_87_end_0 = const()[name = string("x2_87_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_87_end_mask_0 = const()[name = string("x2_87_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_87 = slice_by_index(begin = x2_87_begin_0, end = x2_87_end_0, end_mask = x2_87_end_mask_0, x = var_11492_cast_fp16)[name = string("x2_87")]; fp16 const_386_promoted = const()[name = string("const_386_promoted"), val = fp16(-0x1p+0)]; tensor var_11544 = mul(x = x2_87, y = const_386_promoted)[name = string("op_11544")]; int32 var_11546 = const()[name = string("op_11546"), val = int32(-1)]; bool var_11547_interleave_0 = const()[name = string("op_11547_interleave_0"), val = bool(false)]; tensor var_11547 = concat(axis = var_11546, interleave = var_11547_interleave_0, values = (var_11544, x1_87))[name = string("op_11547")]; tensor var_11548 = mul(x = var_11547, y = sin_1_cast_fp16)[name = string("op_11548")]; tensor key_states_85 = add(x = var_11523, y = var_11548)[name = string("key_states_85")]; tensor expand_dims_252 = const()[name = string("expand_dims_252"), val = tensor([21])]; tensor expand_dims_253 = const()[name = string("expand_dims_253"), val = tensor([0])]; tensor expand_dims_255 = const()[name = string("expand_dims_255"), val = tensor([0])]; tensor expand_dims_256 = const()[name = string("expand_dims_256"), val = tensor([22])]; int32 concat_170_axis_0 = const()[name = string("concat_170_axis_0"), val = int32(0)]; bool concat_170_interleave_0 = const()[name = string("concat_170_interleave_0"), val = bool(false)]; tensor concat_170 = concat(axis = concat_170_axis_0, interleave = concat_170_interleave_0, values = (expand_dims_252, expand_dims_253, current_pos, expand_dims_255))[name = string("concat_170")]; tensor concat_171_values1_0 = const()[name = string("concat_171_values1_0"), val = tensor([0])]; tensor concat_171_values3_0 = const()[name = string("concat_171_values3_0"), val = tensor([0])]; int32 concat_171_axis_0 = const()[name = string("concat_171_axis_0"), val = int32(0)]; bool concat_171_interleave_0 = const()[name = string("concat_171_interleave_0"), val = bool(false)]; tensor concat_171 = concat(axis = concat_171_axis_0, interleave = concat_171_interleave_0, values = (expand_dims_256, concat_171_values1_0, var_1725, concat_171_values3_0))[name = string("concat_171")]; tensor model_model_kv_cache_0_internal_tensor_assign_43_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_43_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_43_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_43_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_43_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_43_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_43_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_43_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_43_cast_fp16 = slice_update(begin = concat_170, begin_mask = model_model_kv_cache_0_internal_tensor_assign_43_begin_mask_0, end = concat_171, end_mask = model_model_kv_cache_0_internal_tensor_assign_43_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_43_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_43_stride_0, update = key_states_85, x = coreml_update_state_97)[name = string("model_model_kv_cache_0_internal_tensor_assign_43_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_43_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_42_write_state")]; tensor coreml_update_state_98 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_42")]; tensor expand_dims_258 = const()[name = string("expand_dims_258"), val = tensor([49])]; tensor expand_dims_259 = const()[name = string("expand_dims_259"), val = tensor([0])]; tensor expand_dims_261 = const()[name = string("expand_dims_261"), val = tensor([0])]; tensor expand_dims_262 = const()[name = string("expand_dims_262"), val = tensor([50])]; int32 concat_174_axis_0 = const()[name = string("concat_174_axis_0"), val = int32(0)]; bool concat_174_interleave_0 = const()[name = string("concat_174_interleave_0"), val = bool(false)]; tensor concat_174 = concat(axis = concat_174_axis_0, interleave = concat_174_interleave_0, values = (expand_dims_258, expand_dims_259, current_pos, expand_dims_261))[name = string("concat_174")]; tensor concat_175_values1_0 = const()[name = string("concat_175_values1_0"), val = tensor([0])]; tensor concat_175_values3_0 = const()[name = string("concat_175_values3_0"), val = tensor([0])]; int32 concat_175_axis_0 = const()[name = string("concat_175_axis_0"), val = int32(0)]; bool concat_175_interleave_0 = const()[name = string("concat_175_interleave_0"), val = bool(false)]; tensor concat_175 = concat(axis = concat_175_axis_0, interleave = concat_175_interleave_0, values = (expand_dims_262, concat_175_values1_0, var_1725, concat_175_values3_0))[name = string("concat_175")]; tensor model_model_kv_cache_0_internal_tensor_assign_44_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_44_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_44_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_44_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_44_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_44_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_44_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_44_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_44_cast_fp16 = slice_update(begin = concat_174, begin_mask = model_model_kv_cache_0_internal_tensor_assign_44_begin_mask_0, end = concat_175, end_mask = model_model_kv_cache_0_internal_tensor_assign_44_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_44_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_44_stride_0, update = var_11453, x = coreml_update_state_98)[name = string("model_model_kv_cache_0_internal_tensor_assign_44_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_44_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_43_write_state")]; tensor coreml_update_state_99 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_43")]; tensor var_11603_begin_0 = const()[name = string("op_11603_begin_0"), val = tensor([21, 0, 0, 0])]; tensor var_11603_end_0 = const()[name = string("op_11603_end_0"), val = tensor([22, 8, 1024, 128])]; tensor var_11603_end_mask_0 = const()[name = string("op_11603_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_11603_cast_fp16 = slice_by_index(begin = var_11603_begin_0, end = var_11603_end_0, end_mask = var_11603_end_mask_0, x = coreml_update_state_99)[name = string("op_11603_cast_fp16")]; tensor K_layer_cache_43_axes_0 = const()[name = string("K_layer_cache_43_axes_0"), val = tensor([0])]; tensor K_layer_cache_43_cast_fp16 = squeeze(axes = K_layer_cache_43_axes_0, x = var_11603_cast_fp16)[name = string("K_layer_cache_43_cast_fp16")]; tensor var_11610_begin_0 = const()[name = string("op_11610_begin_0"), val = tensor([49, 0, 0, 0])]; tensor var_11610_end_0 = const()[name = string("op_11610_end_0"), val = tensor([50, 8, 1024, 128])]; tensor var_11610_end_mask_0 = const()[name = string("op_11610_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_11610_cast_fp16 = slice_by_index(begin = var_11610_begin_0, end = var_11610_end_0, end_mask = var_11610_end_mask_0, x = coreml_update_state_99)[name = string("op_11610_cast_fp16")]; tensor V_layer_cache_43_axes_0 = const()[name = string("V_layer_cache_43_axes_0"), val = tensor([0])]; tensor V_layer_cache_43_cast_fp16 = squeeze(axes = V_layer_cache_43_axes_0, x = var_11610_cast_fp16)[name = string("V_layer_cache_43_cast_fp16")]; tensor x_427_axes_0 = const()[name = string("x_427_axes_0"), val = tensor([1])]; tensor x_427_cast_fp16 = expand_dims(axes = x_427_axes_0, x = K_layer_cache_43_cast_fp16)[name = string("x_427_cast_fp16")]; tensor var_11647 = const()[name = string("op_11647"), val = tensor([1, 2, 1, 1])]; tensor x_429_cast_fp16 = tile(reps = var_11647, x = x_427_cast_fp16)[name = string("x_429_cast_fp16")]; tensor var_11659 = const()[name = string("op_11659"), val = tensor([1, -1, 1024, 128])]; tensor key_states_87_cast_fp16 = reshape(shape = var_11659, x = x_429_cast_fp16)[name = string("key_states_87_cast_fp16")]; tensor x_433_axes_0 = const()[name = string("x_433_axes_0"), val = tensor([1])]; tensor x_433_cast_fp16 = expand_dims(axes = x_433_axes_0, x = V_layer_cache_43_cast_fp16)[name = string("x_433_cast_fp16")]; tensor var_11667 = const()[name = string("op_11667"), val = tensor([1, 2, 1, 1])]; tensor x_435_cast_fp16 = tile(reps = var_11667, x = x_433_cast_fp16)[name = string("x_435_cast_fp16")]; tensor var_11679 = const()[name = string("op_11679"), val = tensor([1, -1, 1024, 128])]; tensor value_states_129_cast_fp16 = reshape(shape = var_11679, x = x_435_cast_fp16)[name = string("value_states_129_cast_fp16")]; bool var_11694_transpose_x_1 = const()[name = string("op_11694_transpose_x_1"), val = bool(false)]; bool var_11694_transpose_y_1 = const()[name = string("op_11694_transpose_y_1"), val = bool(true)]; tensor var_11694 = matmul(transpose_x = var_11694_transpose_x_1, transpose_y = var_11694_transpose_y_1, x = query_states_85, y = key_states_87_cast_fp16)[name = string("op_11694")]; fp16 var_11695_to_fp16 = const()[name = string("op_11695_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_127_cast_fp16 = mul(x = var_11694, y = var_11695_to_fp16)[name = string("attn_weights_127_cast_fp16")]; tensor attn_weights_129_cast_fp16 = add(x = attn_weights_127_cast_fp16, y = causal_mask)[name = string("attn_weights_129_cast_fp16")]; int32 var_11730 = const()[name = string("op_11730"), val = int32(-1)]; tensor attn_weights_131_cast_fp16 = softmax(axis = var_11730, x = attn_weights_129_cast_fp16)[name = string("attn_weights_131_cast_fp16")]; bool attn_output_211_transpose_x_0 = const()[name = string("attn_output_211_transpose_x_0"), val = bool(false)]; bool attn_output_211_transpose_y_0 = const()[name = string("attn_output_211_transpose_y_0"), val = bool(false)]; tensor attn_output_211_cast_fp16 = matmul(transpose_x = attn_output_211_transpose_x_0, transpose_y = attn_output_211_transpose_y_0, x = attn_weights_131_cast_fp16, y = value_states_129_cast_fp16)[name = string("attn_output_211_cast_fp16")]; tensor var_11741_perm_0 = const()[name = string("op_11741_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_11745 = const()[name = string("op_11745"), val = tensor([1, 1, 2048])]; tensor var_11741_cast_fp16 = transpose(perm = var_11741_perm_0, x = attn_output_211_cast_fp16)[name = string("transpose_40")]; tensor attn_output_215_cast_fp16 = reshape(shape = var_11745, x = var_11741_cast_fp16)[name = string("attn_output_215_cast_fp16")]; tensor var_11750 = const()[name = string("op_11750"), val = tensor([0, 2, 1])]; string var_11766_pad_type_0 = const()[name = string("op_11766_pad_type_0"), val = string("valid")]; int32 var_11766_groups_0 = const()[name = string("op_11766_groups_0"), val = int32(1)]; tensor var_11766_strides_0 = const()[name = string("op_11766_strides_0"), val = tensor([1])]; tensor var_11766_pad_0 = const()[name = string("op_11766_pad_0"), val = tensor([0, 0])]; tensor var_11766_dilations_0 = const()[name = string("op_11766_dilations_0"), val = tensor([1])]; tensor squeeze_21_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1077661952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1080807744))))[name = string("squeeze_21_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_11751_cast_fp16 = transpose(perm = var_11750, x = attn_output_215_cast_fp16)[name = string("transpose_39")]; tensor var_11766_cast_fp16 = conv(dilations = var_11766_dilations_0, groups = var_11766_groups_0, pad = var_11766_pad_0, pad_type = var_11766_pad_type_0, strides = var_11766_strides_0, weight = squeeze_21_cast_fp16_to_fp32_to_fp16_palettized, x = var_11751_cast_fp16)[name = string("op_11766_cast_fp16")]; tensor var_11770 = const()[name = string("op_11770"), val = tensor([0, 2, 1])]; tensor attn_output_219_cast_fp16 = transpose(perm = var_11770, x = var_11766_cast_fp16)[name = string("transpose_38")]; tensor hidden_states_131_cast_fp16 = add(x = hidden_states_127_cast_fp16, y = attn_output_219_cast_fp16)[name = string("hidden_states_131_cast_fp16")]; tensor mean_175_axes_0 = const()[name = string("mean_175_axes_0"), val = tensor([-1])]; bool mean_175_keep_dims_0 = const()[name = string("mean_175_keep_dims_0"), val = bool(true)]; tensor mean_175_cast_fp16 = reduce_mean(axes = mean_175_axes_0, keep_dims = mean_175_keep_dims_0, x = hidden_states_131_cast_fp16)[name = string("mean_175_cast_fp16")]; tensor input_389_cast_fp16 = sub(x = hidden_states_131_cast_fp16, y = mean_175_cast_fp16)[name = string("input_389_cast_fp16")]; tensor var_11789_axes_0 = const()[name = string("op_11789_axes_0"), val = tensor([-1])]; tensor model_model_layers_21_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_21_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1080840576)))]; fp16 var_11777_to_fp16 = const()[name = string("op_11777_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_11789_cast_fp16 = layer_norm(axes = var_11789_axes_0, epsilon = var_11777_to_fp16, gamma = model_model_layers_21_post_attention_layernorm_weight_to_fp16, x = input_389_cast_fp16)[name = string("op_11789_cast_fp16")]; tensor var_11803 = const()[name = string("op_11803"), val = tensor([0, 2, 1])]; tensor input_391_axes_0 = const()[name = string("input_391_axes_0"), val = tensor([2])]; tensor var_11804 = transpose(perm = var_11803, x = var_11789_cast_fp16)[name = string("transpose_37")]; tensor input_391 = expand_dims(axes = input_391_axes_0, x = var_11804)[name = string("input_391")]; string input_393_pad_type_0 = const()[name = string("input_393_pad_type_0"), val = string("valid")]; tensor input_393_strides_0 = const()[name = string("input_393_strides_0"), val = tensor([1, 1])]; tensor input_393_pad_0 = const()[name = string("input_393_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_393_dilations_0 = const()[name = string("input_393_dilations_0"), val = tensor([1, 1])]; int32 input_393_groups_0 = const()[name = string("input_393_groups_0"), val = int32(1)]; tensor input_393 = conv(dilations = input_393_dilations_0, groups = input_393_groups_0, pad = input_393_pad_0, pad_type = input_393_pad_type_0, strides = input_393_strides_0, weight = model_model_layers_21_mlp_gate_proj_weight_palettized, x = input_391)[name = string("input_393")]; string b_43_pad_type_0 = const()[name = string("b_43_pad_type_0"), val = string("valid")]; tensor b_43_strides_0 = const()[name = string("b_43_strides_0"), val = tensor([1, 1])]; tensor b_43_pad_0 = const()[name = string("b_43_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_43_dilations_0 = const()[name = string("b_43_dilations_0"), val = tensor([1, 1])]; int32 b_43_groups_0 = const()[name = string("b_43_groups_0"), val = int32(1)]; tensor b_43 = conv(dilations = b_43_dilations_0, groups = b_43_groups_0, pad = b_43_pad_0, pad_type = b_43_pad_type_0, strides = b_43_strides_0, weight = model_model_layers_21_mlp_up_proj_weight_palettized, x = input_391)[name = string("b_43")]; tensor c_43 = silu(x = input_393)[name = string("c_43")]; tensor input_395 = mul(x = c_43, y = b_43)[name = string("input_395")]; string e_43_pad_type_0 = const()[name = string("e_43_pad_type_0"), val = string("valid")]; tensor e_43_strides_0 = const()[name = string("e_43_strides_0"), val = tensor([1, 1])]; tensor e_43_pad_0 = const()[name = string("e_43_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_43_dilations_0 = const()[name = string("e_43_dilations_0"), val = tensor([1, 1])]; int32 e_43_groups_0 = const()[name = string("e_43_groups_0"), val = int32(1)]; tensor e_43 = conv(dilations = e_43_dilations_0, groups = e_43_groups_0, pad = e_43_pad_0, pad_type = e_43_pad_type_0, strides = e_43_strides_0, weight = model_model_layers_21_mlp_down_proj_weight_palettized, x = input_395)[name = string("e_43")]; tensor var_11826_axes_0 = const()[name = string("op_11826_axes_0"), val = tensor([2])]; tensor var_11826 = squeeze(axes = var_11826_axes_0, x = e_43)[name = string("op_11826")]; tensor var_11827 = const()[name = string("op_11827"), val = tensor([0, 2, 1])]; tensor var_11828 = transpose(perm = var_11827, x = var_11826)[name = string("transpose_36")]; tensor hidden_states_133_cast_fp16 = add(x = hidden_states_131_cast_fp16, y = var_11828)[name = string("hidden_states_133_cast_fp16")]; tensor mean_177_axes_0 = const()[name = string("mean_177_axes_0"), val = tensor([-1])]; bool mean_177_keep_dims_0 = const()[name = string("mean_177_keep_dims_0"), val = bool(true)]; tensor mean_177_cast_fp16 = reduce_mean(axes = mean_177_axes_0, keep_dims = mean_177_keep_dims_0, x = hidden_states_133_cast_fp16)[name = string("mean_177_cast_fp16")]; tensor input_397_cast_fp16 = sub(x = hidden_states_133_cast_fp16, y = mean_177_cast_fp16)[name = string("input_397_cast_fp16")]; tensor var_11846_axes_0 = const()[name = string("op_11846_axes_0"), val = tensor([-1])]; tensor model_model_layers_22_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_22_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1080844736)))]; fp16 var_11834_to_fp16 = const()[name = string("op_11834_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_11846_cast_fp16 = layer_norm(axes = var_11846_axes_0, epsilon = var_11834_to_fp16, gamma = model_model_layers_22_input_layernorm_weight_to_fp16, x = input_397_cast_fp16)[name = string("op_11846_cast_fp16")]; tensor var_11852 = const()[name = string("op_11852"), val = tensor([0, 2, 1])]; tensor var_11855_axes_0 = const()[name = string("op_11855_axes_0"), val = tensor([2])]; tensor var_11853 = transpose(perm = var_11852, x = var_11846_cast_fp16)[name = string("transpose_35")]; tensor var_11855 = expand_dims(axes = var_11855_axes_0, x = var_11853)[name = string("op_11855")]; string var_11871_pad_type_0 = const()[name = string("op_11871_pad_type_0"), val = string("valid")]; tensor var_11871_strides_0 = const()[name = string("op_11871_strides_0"), val = tensor([1, 1])]; tensor var_11871_pad_0 = const()[name = string("op_11871_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_11871_dilations_0 = const()[name = string("op_11871_dilations_0"), val = tensor([1, 1])]; int32 var_11871_groups_0 = const()[name = string("op_11871_groups_0"), val = int32(1)]; tensor var_11871 = conv(dilations = var_11871_dilations_0, groups = var_11871_groups_0, pad = var_11871_pad_0, pad_type = var_11871_pad_type_0, strides = var_11871_strides_0, weight = model_model_layers_22_self_attn_q_proj_weight_palettized, x = var_11855)[name = string("op_11871")]; tensor var_11876 = const()[name = string("op_11876"), val = tensor([1, 16, 1, 128])]; tensor var_11877 = reshape(shape = var_11876, x = var_11871)[name = string("op_11877")]; string var_11893_pad_type_0 = const()[name = string("op_11893_pad_type_0"), val = string("valid")]; tensor var_11893_strides_0 = const()[name = string("op_11893_strides_0"), val = tensor([1, 1])]; tensor var_11893_pad_0 = const()[name = string("op_11893_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_11893_dilations_0 = const()[name = string("op_11893_dilations_0"), val = tensor([1, 1])]; int32 var_11893_groups_0 = const()[name = string("op_11893_groups_0"), val = int32(1)]; tensor var_11893 = conv(dilations = var_11893_dilations_0, groups = var_11893_groups_0, pad = var_11893_pad_0, pad_type = var_11893_pad_type_0, strides = var_11893_strides_0, weight = model_model_layers_22_self_attn_k_proj_weight_palettized, x = var_11855)[name = string("op_11893")]; tensor var_11898 = const()[name = string("op_11898"), val = tensor([1, 8, 1, 128])]; tensor var_11899 = reshape(shape = var_11898, x = var_11893)[name = string("op_11899")]; string var_11915_pad_type_0 = const()[name = string("op_11915_pad_type_0"), val = string("valid")]; tensor var_11915_strides_0 = const()[name = string("op_11915_strides_0"), val = tensor([1, 1])]; tensor var_11915_pad_0 = const()[name = string("op_11915_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_11915_dilations_0 = const()[name = string("op_11915_dilations_0"), val = tensor([1, 1])]; int32 var_11915_groups_0 = const()[name = string("op_11915_groups_0"), val = int32(1)]; tensor var_11915 = conv(dilations = var_11915_dilations_0, groups = var_11915_groups_0, pad = var_11915_pad_0, pad_type = var_11915_pad_type_0, strides = var_11915_strides_0, weight = model_model_layers_22_self_attn_v_proj_weight_palettized, x = var_11855)[name = string("op_11915")]; tensor var_11920 = const()[name = string("op_11920"), val = tensor([1, 8, 1, 128])]; tensor var_11921 = reshape(shape = var_11920, x = var_11915)[name = string("op_11921")]; tensor mean_179_axes_0 = const()[name = string("mean_179_axes_0"), val = tensor([-1])]; bool mean_179_keep_dims_0 = const()[name = string("mean_179_keep_dims_0"), val = bool(true)]; tensor mean_179 = reduce_mean(axes = mean_179_axes_0, keep_dims = mean_179_keep_dims_0, x = var_11877)[name = string("mean_179")]; tensor input_401 = sub(x = var_11877, y = mean_179)[name = string("input_401")]; tensor var_11942_axes_0 = const()[name = string("op_11942_axes_0"), val = tensor([-1])]; tensor model_model_layers_22_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_22_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1080848896)))]; fp16 var_11930_to_fp16 = const()[name = string("op_11930_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_11942_cast_fp16 = layer_norm(axes = var_11942_axes_0, epsilon = var_11930_to_fp16, gamma = model_model_layers_22_self_attn_q_norm_weight_to_fp16, x = input_401)[name = string("op_11942_cast_fp16")]; tensor mean_181_axes_0 = const()[name = string("mean_181_axes_0"), val = tensor([-1])]; bool mean_181_keep_dims_0 = const()[name = string("mean_181_keep_dims_0"), val = bool(true)]; tensor mean_181 = reduce_mean(axes = mean_181_axes_0, keep_dims = mean_181_keep_dims_0, x = var_11899)[name = string("mean_181")]; tensor input_403 = sub(x = var_11899, y = mean_181)[name = string("input_403")]; tensor var_11960_axes_0 = const()[name = string("op_11960_axes_0"), val = tensor([-1])]; tensor model_model_layers_22_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_22_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1080849216)))]; fp16 var_11948_to_fp16 = const()[name = string("op_11948_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_11960_cast_fp16 = layer_norm(axes = var_11960_axes_0, epsilon = var_11948_to_fp16, gamma = model_model_layers_22_self_attn_k_norm_weight_to_fp16, x = input_403)[name = string("op_11960_cast_fp16")]; tensor var_11963 = mul(x = var_11942_cast_fp16, y = cos_1_cast_fp16)[name = string("op_11963")]; tensor x1_89_begin_0 = const()[name = string("x1_89_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_89_end_0 = const()[name = string("x1_89_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_89_end_mask_0 = const()[name = string("x1_89_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_89 = slice_by_index(begin = x1_89_begin_0, end = x1_89_end_0, end_mask = x1_89_end_mask_0, x = var_11942_cast_fp16)[name = string("x1_89")]; tensor x2_89_begin_0 = const()[name = string("x2_89_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_89_end_0 = const()[name = string("x2_89_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_89_end_mask_0 = const()[name = string("x2_89_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_89 = slice_by_index(begin = x2_89_begin_0, end = x2_89_end_0, end_mask = x2_89_end_mask_0, x = var_11942_cast_fp16)[name = string("x2_89")]; fp16 const_401_promoted = const()[name = string("const_401_promoted"), val = fp16(-0x1p+0)]; tensor var_11984 = mul(x = x2_89, y = const_401_promoted)[name = string("op_11984")]; int32 var_11986 = const()[name = string("op_11986"), val = int32(-1)]; bool var_11987_interleave_0 = const()[name = string("op_11987_interleave_0"), val = bool(false)]; tensor var_11987 = concat(axis = var_11986, interleave = var_11987_interleave_0, values = (var_11984, x1_89))[name = string("op_11987")]; tensor var_11988 = mul(x = var_11987, y = sin_1_cast_fp16)[name = string("op_11988")]; tensor query_states_89 = add(x = var_11963, y = var_11988)[name = string("query_states_89")]; tensor var_11991 = mul(x = var_11960_cast_fp16, y = cos_1_cast_fp16)[name = string("op_11991")]; tensor x1_91_begin_0 = const()[name = string("x1_91_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_91_end_0 = const()[name = string("x1_91_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_91_end_mask_0 = const()[name = string("x1_91_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_91 = slice_by_index(begin = x1_91_begin_0, end = x1_91_end_0, end_mask = x1_91_end_mask_0, x = var_11960_cast_fp16)[name = string("x1_91")]; tensor x2_91_begin_0 = const()[name = string("x2_91_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_91_end_0 = const()[name = string("x2_91_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_91_end_mask_0 = const()[name = string("x2_91_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_91 = slice_by_index(begin = x2_91_begin_0, end = x2_91_end_0, end_mask = x2_91_end_mask_0, x = var_11960_cast_fp16)[name = string("x2_91")]; fp16 const_404_promoted = const()[name = string("const_404_promoted"), val = fp16(-0x1p+0)]; tensor var_12012 = mul(x = x2_91, y = const_404_promoted)[name = string("op_12012")]; int32 var_12014 = const()[name = string("op_12014"), val = int32(-1)]; bool var_12015_interleave_0 = const()[name = string("op_12015_interleave_0"), val = bool(false)]; tensor var_12015 = concat(axis = var_12014, interleave = var_12015_interleave_0, values = (var_12012, x1_91))[name = string("op_12015")]; tensor var_12016 = mul(x = var_12015, y = sin_1_cast_fp16)[name = string("op_12016")]; tensor key_states_89 = add(x = var_11991, y = var_12016)[name = string("key_states_89")]; tensor expand_dims_264 = const()[name = string("expand_dims_264"), val = tensor([22])]; tensor expand_dims_265 = const()[name = string("expand_dims_265"), val = tensor([0])]; tensor expand_dims_267 = const()[name = string("expand_dims_267"), val = tensor([0])]; tensor expand_dims_268 = const()[name = string("expand_dims_268"), val = tensor([23])]; int32 concat_178_axis_0 = const()[name = string("concat_178_axis_0"), val = int32(0)]; bool concat_178_interleave_0 = const()[name = string("concat_178_interleave_0"), val = bool(false)]; tensor concat_178 = concat(axis = concat_178_axis_0, interleave = concat_178_interleave_0, values = (expand_dims_264, expand_dims_265, current_pos, expand_dims_267))[name = string("concat_178")]; tensor concat_179_values1_0 = const()[name = string("concat_179_values1_0"), val = tensor([0])]; tensor concat_179_values3_0 = const()[name = string("concat_179_values3_0"), val = tensor([0])]; int32 concat_179_axis_0 = const()[name = string("concat_179_axis_0"), val = int32(0)]; bool concat_179_interleave_0 = const()[name = string("concat_179_interleave_0"), val = bool(false)]; tensor concat_179 = concat(axis = concat_179_axis_0, interleave = concat_179_interleave_0, values = (expand_dims_268, concat_179_values1_0, var_1725, concat_179_values3_0))[name = string("concat_179")]; tensor model_model_kv_cache_0_internal_tensor_assign_45_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_45_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_45_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_45_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_45_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_45_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_45_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_45_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_45_cast_fp16 = slice_update(begin = concat_178, begin_mask = model_model_kv_cache_0_internal_tensor_assign_45_begin_mask_0, end = concat_179, end_mask = model_model_kv_cache_0_internal_tensor_assign_45_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_45_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_45_stride_0, update = key_states_89, x = coreml_update_state_99)[name = string("model_model_kv_cache_0_internal_tensor_assign_45_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_45_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_44_write_state")]; tensor coreml_update_state_100 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_44")]; tensor expand_dims_270 = const()[name = string("expand_dims_270"), val = tensor([50])]; tensor expand_dims_271 = const()[name = string("expand_dims_271"), val = tensor([0])]; tensor expand_dims_273 = const()[name = string("expand_dims_273"), val = tensor([0])]; tensor expand_dims_274 = const()[name = string("expand_dims_274"), val = tensor([51])]; int32 concat_182_axis_0 = const()[name = string("concat_182_axis_0"), val = int32(0)]; bool concat_182_interleave_0 = const()[name = string("concat_182_interleave_0"), val = bool(false)]; tensor concat_182 = concat(axis = concat_182_axis_0, interleave = concat_182_interleave_0, values = (expand_dims_270, expand_dims_271, current_pos, expand_dims_273))[name = string("concat_182")]; tensor concat_183_values1_0 = const()[name = string("concat_183_values1_0"), val = tensor([0])]; tensor concat_183_values3_0 = const()[name = string("concat_183_values3_0"), val = tensor([0])]; int32 concat_183_axis_0 = const()[name = string("concat_183_axis_0"), val = int32(0)]; bool concat_183_interleave_0 = const()[name = string("concat_183_interleave_0"), val = bool(false)]; tensor concat_183 = concat(axis = concat_183_axis_0, interleave = concat_183_interleave_0, values = (expand_dims_274, concat_183_values1_0, var_1725, concat_183_values3_0))[name = string("concat_183")]; tensor model_model_kv_cache_0_internal_tensor_assign_46_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_46_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_46_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_46_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_46_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_46_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_46_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_46_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_46_cast_fp16 = slice_update(begin = concat_182, begin_mask = model_model_kv_cache_0_internal_tensor_assign_46_begin_mask_0, end = concat_183, end_mask = model_model_kv_cache_0_internal_tensor_assign_46_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_46_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_46_stride_0, update = var_11921, x = coreml_update_state_100)[name = string("model_model_kv_cache_0_internal_tensor_assign_46_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_46_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_45_write_state")]; tensor coreml_update_state_101 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_45")]; tensor var_12071_begin_0 = const()[name = string("op_12071_begin_0"), val = tensor([22, 0, 0, 0])]; tensor var_12071_end_0 = const()[name = string("op_12071_end_0"), val = tensor([23, 8, 1024, 128])]; tensor var_12071_end_mask_0 = const()[name = string("op_12071_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_12071_cast_fp16 = slice_by_index(begin = var_12071_begin_0, end = var_12071_end_0, end_mask = var_12071_end_mask_0, x = coreml_update_state_101)[name = string("op_12071_cast_fp16")]; tensor K_layer_cache_45_axes_0 = const()[name = string("K_layer_cache_45_axes_0"), val = tensor([0])]; tensor K_layer_cache_45_cast_fp16 = squeeze(axes = K_layer_cache_45_axes_0, x = var_12071_cast_fp16)[name = string("K_layer_cache_45_cast_fp16")]; tensor var_12078_begin_0 = const()[name = string("op_12078_begin_0"), val = tensor([50, 0, 0, 0])]; tensor var_12078_end_0 = const()[name = string("op_12078_end_0"), val = tensor([51, 8, 1024, 128])]; tensor var_12078_end_mask_0 = const()[name = string("op_12078_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_12078_cast_fp16 = slice_by_index(begin = var_12078_begin_0, end = var_12078_end_0, end_mask = var_12078_end_mask_0, x = coreml_update_state_101)[name = string("op_12078_cast_fp16")]; tensor V_layer_cache_45_axes_0 = const()[name = string("V_layer_cache_45_axes_0"), val = tensor([0])]; tensor V_layer_cache_45_cast_fp16 = squeeze(axes = V_layer_cache_45_axes_0, x = var_12078_cast_fp16)[name = string("V_layer_cache_45_cast_fp16")]; tensor x_447_axes_0 = const()[name = string("x_447_axes_0"), val = tensor([1])]; tensor x_447_cast_fp16 = expand_dims(axes = x_447_axes_0, x = K_layer_cache_45_cast_fp16)[name = string("x_447_cast_fp16")]; tensor var_12115 = const()[name = string("op_12115"), val = tensor([1, 2, 1, 1])]; tensor x_449_cast_fp16 = tile(reps = var_12115, x = x_447_cast_fp16)[name = string("x_449_cast_fp16")]; tensor var_12127 = const()[name = string("op_12127"), val = tensor([1, -1, 1024, 128])]; tensor key_states_91_cast_fp16 = reshape(shape = var_12127, x = x_449_cast_fp16)[name = string("key_states_91_cast_fp16")]; tensor x_453_axes_0 = const()[name = string("x_453_axes_0"), val = tensor([1])]; tensor x_453_cast_fp16 = expand_dims(axes = x_453_axes_0, x = V_layer_cache_45_cast_fp16)[name = string("x_453_cast_fp16")]; tensor var_12135 = const()[name = string("op_12135"), val = tensor([1, 2, 1, 1])]; tensor x_455_cast_fp16 = tile(reps = var_12135, x = x_453_cast_fp16)[name = string("x_455_cast_fp16")]; tensor var_12147 = const()[name = string("op_12147"), val = tensor([1, -1, 1024, 128])]; tensor value_states_135_cast_fp16 = reshape(shape = var_12147, x = x_455_cast_fp16)[name = string("value_states_135_cast_fp16")]; bool var_12162_transpose_x_1 = const()[name = string("op_12162_transpose_x_1"), val = bool(false)]; bool var_12162_transpose_y_1 = const()[name = string("op_12162_transpose_y_1"), val = bool(true)]; tensor var_12162 = matmul(transpose_x = var_12162_transpose_x_1, transpose_y = var_12162_transpose_y_1, x = query_states_89, y = key_states_91_cast_fp16)[name = string("op_12162")]; fp16 var_12163_to_fp16 = const()[name = string("op_12163_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_133_cast_fp16 = mul(x = var_12162, y = var_12163_to_fp16)[name = string("attn_weights_133_cast_fp16")]; tensor attn_weights_135_cast_fp16 = add(x = attn_weights_133_cast_fp16, y = causal_mask)[name = string("attn_weights_135_cast_fp16")]; int32 var_12198 = const()[name = string("op_12198"), val = int32(-1)]; tensor attn_weights_137_cast_fp16 = softmax(axis = var_12198, x = attn_weights_135_cast_fp16)[name = string("attn_weights_137_cast_fp16")]; bool attn_output_221_transpose_x_0 = const()[name = string("attn_output_221_transpose_x_0"), val = bool(false)]; bool attn_output_221_transpose_y_0 = const()[name = string("attn_output_221_transpose_y_0"), val = bool(false)]; tensor attn_output_221_cast_fp16 = matmul(transpose_x = attn_output_221_transpose_x_0, transpose_y = attn_output_221_transpose_y_0, x = attn_weights_137_cast_fp16, y = value_states_135_cast_fp16)[name = string("attn_output_221_cast_fp16")]; tensor var_12209_perm_0 = const()[name = string("op_12209_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_12213 = const()[name = string("op_12213"), val = tensor([1, 1, 2048])]; tensor var_12209_cast_fp16 = transpose(perm = var_12209_perm_0, x = attn_output_221_cast_fp16)[name = string("transpose_34")]; tensor attn_output_225_cast_fp16 = reshape(shape = var_12213, x = var_12209_cast_fp16)[name = string("attn_output_225_cast_fp16")]; tensor var_12218 = const()[name = string("op_12218"), val = tensor([0, 2, 1])]; string var_12234_pad_type_0 = const()[name = string("op_12234_pad_type_0"), val = string("valid")]; int32 var_12234_groups_0 = const()[name = string("op_12234_groups_0"), val = int32(1)]; tensor var_12234_strides_0 = const()[name = string("op_12234_strides_0"), val = tensor([1])]; tensor var_12234_pad_0 = const()[name = string("op_12234_pad_0"), val = tensor([0, 0])]; tensor var_12234_dilations_0 = const()[name = string("op_12234_dilations_0"), val = tensor([1])]; tensor squeeze_22_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1080849536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1083995328))))[name = string("squeeze_22_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_12219_cast_fp16 = transpose(perm = var_12218, x = attn_output_225_cast_fp16)[name = string("transpose_33")]; tensor var_12234_cast_fp16 = conv(dilations = var_12234_dilations_0, groups = var_12234_groups_0, pad = var_12234_pad_0, pad_type = var_12234_pad_type_0, strides = var_12234_strides_0, weight = squeeze_22_cast_fp16_to_fp32_to_fp16_palettized, x = var_12219_cast_fp16)[name = string("op_12234_cast_fp16")]; tensor var_12238 = const()[name = string("op_12238"), val = tensor([0, 2, 1])]; tensor attn_output_229_cast_fp16 = transpose(perm = var_12238, x = var_12234_cast_fp16)[name = string("transpose_32")]; tensor hidden_states_137_cast_fp16 = add(x = hidden_states_133_cast_fp16, y = attn_output_229_cast_fp16)[name = string("hidden_states_137_cast_fp16")]; tensor mean_183_axes_0 = const()[name = string("mean_183_axes_0"), val = tensor([-1])]; bool mean_183_keep_dims_0 = const()[name = string("mean_183_keep_dims_0"), val = bool(true)]; tensor mean_183_cast_fp16 = reduce_mean(axes = mean_183_axes_0, keep_dims = mean_183_keep_dims_0, x = hidden_states_137_cast_fp16)[name = string("mean_183_cast_fp16")]; tensor input_407_cast_fp16 = sub(x = hidden_states_137_cast_fp16, y = mean_183_cast_fp16)[name = string("input_407_cast_fp16")]; tensor var_12257_axes_0 = const()[name = string("op_12257_axes_0"), val = tensor([-1])]; tensor model_model_layers_22_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_22_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1084028160)))]; fp16 var_12245_to_fp16 = const()[name = string("op_12245_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_12257_cast_fp16 = layer_norm(axes = var_12257_axes_0, epsilon = var_12245_to_fp16, gamma = model_model_layers_22_post_attention_layernorm_weight_to_fp16, x = input_407_cast_fp16)[name = string("op_12257_cast_fp16")]; tensor var_12271 = const()[name = string("op_12271"), val = tensor([0, 2, 1])]; tensor input_409_axes_0 = const()[name = string("input_409_axes_0"), val = tensor([2])]; tensor var_12272 = transpose(perm = var_12271, x = var_12257_cast_fp16)[name = string("transpose_31")]; tensor input_409 = expand_dims(axes = input_409_axes_0, x = var_12272)[name = string("input_409")]; string input_411_pad_type_0 = const()[name = string("input_411_pad_type_0"), val = string("valid")]; tensor input_411_strides_0 = const()[name = string("input_411_strides_0"), val = tensor([1, 1])]; tensor input_411_pad_0 = const()[name = string("input_411_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_411_dilations_0 = const()[name = string("input_411_dilations_0"), val = tensor([1, 1])]; int32 input_411_groups_0 = const()[name = string("input_411_groups_0"), val = int32(1)]; tensor input_411 = conv(dilations = input_411_dilations_0, groups = input_411_groups_0, pad = input_411_pad_0, pad_type = input_411_pad_type_0, strides = input_411_strides_0, weight = model_model_layers_22_mlp_gate_proj_weight_palettized, x = input_409)[name = string("input_411")]; string b_45_pad_type_0 = const()[name = string("b_45_pad_type_0"), val = string("valid")]; tensor b_45_strides_0 = const()[name = string("b_45_strides_0"), val = tensor([1, 1])]; tensor b_45_pad_0 = const()[name = string("b_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_45_dilations_0 = const()[name = string("b_45_dilations_0"), val = tensor([1, 1])]; int32 b_45_groups_0 = const()[name = string("b_45_groups_0"), val = int32(1)]; tensor b_45 = conv(dilations = b_45_dilations_0, groups = b_45_groups_0, pad = b_45_pad_0, pad_type = b_45_pad_type_0, strides = b_45_strides_0, weight = model_model_layers_22_mlp_up_proj_weight_palettized, x = input_409)[name = string("b_45")]; tensor c_45 = silu(x = input_411)[name = string("c_45")]; tensor input_413 = mul(x = c_45, y = b_45)[name = string("input_413")]; string e_45_pad_type_0 = const()[name = string("e_45_pad_type_0"), val = string("valid")]; tensor e_45_strides_0 = const()[name = string("e_45_strides_0"), val = tensor([1, 1])]; tensor e_45_pad_0 = const()[name = string("e_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_45_dilations_0 = const()[name = string("e_45_dilations_0"), val = tensor([1, 1])]; int32 e_45_groups_0 = const()[name = string("e_45_groups_0"), val = int32(1)]; tensor e_45 = conv(dilations = e_45_dilations_0, groups = e_45_groups_0, pad = e_45_pad_0, pad_type = e_45_pad_type_0, strides = e_45_strides_0, weight = model_model_layers_22_mlp_down_proj_weight_palettized, x = input_413)[name = string("e_45")]; tensor var_12294_axes_0 = const()[name = string("op_12294_axes_0"), val = tensor([2])]; tensor var_12294 = squeeze(axes = var_12294_axes_0, x = e_45)[name = string("op_12294")]; tensor var_12295 = const()[name = string("op_12295"), val = tensor([0, 2, 1])]; tensor var_12296 = transpose(perm = var_12295, x = var_12294)[name = string("transpose_30")]; tensor hidden_states_139_cast_fp16 = add(x = hidden_states_137_cast_fp16, y = var_12296)[name = string("hidden_states_139_cast_fp16")]; tensor mean_185_axes_0 = const()[name = string("mean_185_axes_0"), val = tensor([-1])]; bool mean_185_keep_dims_0 = const()[name = string("mean_185_keep_dims_0"), val = bool(true)]; tensor mean_185_cast_fp16 = reduce_mean(axes = mean_185_axes_0, keep_dims = mean_185_keep_dims_0, x = hidden_states_139_cast_fp16)[name = string("mean_185_cast_fp16")]; tensor input_415_cast_fp16 = sub(x = hidden_states_139_cast_fp16, y = mean_185_cast_fp16)[name = string("input_415_cast_fp16")]; tensor var_12314_axes_0 = const()[name = string("op_12314_axes_0"), val = tensor([-1])]; tensor model_model_layers_23_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_23_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1084032320)))]; fp16 var_12302_to_fp16 = const()[name = string("op_12302_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_12314_cast_fp16 = layer_norm(axes = var_12314_axes_0, epsilon = var_12302_to_fp16, gamma = model_model_layers_23_input_layernorm_weight_to_fp16, x = input_415_cast_fp16)[name = string("op_12314_cast_fp16")]; tensor var_12320 = const()[name = string("op_12320"), val = tensor([0, 2, 1])]; tensor var_12323_axes_0 = const()[name = string("op_12323_axes_0"), val = tensor([2])]; tensor var_12321 = transpose(perm = var_12320, x = var_12314_cast_fp16)[name = string("transpose_29")]; tensor var_12323 = expand_dims(axes = var_12323_axes_0, x = var_12321)[name = string("op_12323")]; string var_12339_pad_type_0 = const()[name = string("op_12339_pad_type_0"), val = string("valid")]; tensor var_12339_strides_0 = const()[name = string("op_12339_strides_0"), val = tensor([1, 1])]; tensor var_12339_pad_0 = const()[name = string("op_12339_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_12339_dilations_0 = const()[name = string("op_12339_dilations_0"), val = tensor([1, 1])]; int32 var_12339_groups_0 = const()[name = string("op_12339_groups_0"), val = int32(1)]; tensor var_12339 = conv(dilations = var_12339_dilations_0, groups = var_12339_groups_0, pad = var_12339_pad_0, pad_type = var_12339_pad_type_0, strides = var_12339_strides_0, weight = model_model_layers_23_self_attn_q_proj_weight_palettized, x = var_12323)[name = string("op_12339")]; tensor var_12344 = const()[name = string("op_12344"), val = tensor([1, 16, 1, 128])]; tensor var_12345 = reshape(shape = var_12344, x = var_12339)[name = string("op_12345")]; string var_12361_pad_type_0 = const()[name = string("op_12361_pad_type_0"), val = string("valid")]; tensor var_12361_strides_0 = const()[name = string("op_12361_strides_0"), val = tensor([1, 1])]; tensor var_12361_pad_0 = const()[name = string("op_12361_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_12361_dilations_0 = const()[name = string("op_12361_dilations_0"), val = tensor([1, 1])]; int32 var_12361_groups_0 = const()[name = string("op_12361_groups_0"), val = int32(1)]; tensor var_12361 = conv(dilations = var_12361_dilations_0, groups = var_12361_groups_0, pad = var_12361_pad_0, pad_type = var_12361_pad_type_0, strides = var_12361_strides_0, weight = model_model_layers_23_self_attn_k_proj_weight_palettized, x = var_12323)[name = string("op_12361")]; tensor var_12366 = const()[name = string("op_12366"), val = tensor([1, 8, 1, 128])]; tensor var_12367 = reshape(shape = var_12366, x = var_12361)[name = string("op_12367")]; string var_12383_pad_type_0 = const()[name = string("op_12383_pad_type_0"), val = string("valid")]; tensor var_12383_strides_0 = const()[name = string("op_12383_strides_0"), val = tensor([1, 1])]; tensor var_12383_pad_0 = const()[name = string("op_12383_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_12383_dilations_0 = const()[name = string("op_12383_dilations_0"), val = tensor([1, 1])]; int32 var_12383_groups_0 = const()[name = string("op_12383_groups_0"), val = int32(1)]; tensor var_12383 = conv(dilations = var_12383_dilations_0, groups = var_12383_groups_0, pad = var_12383_pad_0, pad_type = var_12383_pad_type_0, strides = var_12383_strides_0, weight = model_model_layers_23_self_attn_v_proj_weight_palettized, x = var_12323)[name = string("op_12383")]; tensor var_12388 = const()[name = string("op_12388"), val = tensor([1, 8, 1, 128])]; tensor var_12389 = reshape(shape = var_12388, x = var_12383)[name = string("op_12389")]; tensor mean_187_axes_0 = const()[name = string("mean_187_axes_0"), val = tensor([-1])]; bool mean_187_keep_dims_0 = const()[name = string("mean_187_keep_dims_0"), val = bool(true)]; tensor mean_187 = reduce_mean(axes = mean_187_axes_0, keep_dims = mean_187_keep_dims_0, x = var_12345)[name = string("mean_187")]; tensor input_419 = sub(x = var_12345, y = mean_187)[name = string("input_419")]; tensor var_12410_axes_0 = const()[name = string("op_12410_axes_0"), val = tensor([-1])]; tensor model_model_layers_23_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_23_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1084036480)))]; fp16 var_12398_to_fp16 = const()[name = string("op_12398_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_12410_cast_fp16 = layer_norm(axes = var_12410_axes_0, epsilon = var_12398_to_fp16, gamma = model_model_layers_23_self_attn_q_norm_weight_to_fp16, x = input_419)[name = string("op_12410_cast_fp16")]; tensor mean_189_axes_0 = const()[name = string("mean_189_axes_0"), val = tensor([-1])]; bool mean_189_keep_dims_0 = const()[name = string("mean_189_keep_dims_0"), val = bool(true)]; tensor mean_189 = reduce_mean(axes = mean_189_axes_0, keep_dims = mean_189_keep_dims_0, x = var_12367)[name = string("mean_189")]; tensor input_421 = sub(x = var_12367, y = mean_189)[name = string("input_421")]; tensor var_12428_axes_0 = const()[name = string("op_12428_axes_0"), val = tensor([-1])]; tensor model_model_layers_23_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_23_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1084036800)))]; fp16 var_12416_to_fp16 = const()[name = string("op_12416_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_12428_cast_fp16 = layer_norm(axes = var_12428_axes_0, epsilon = var_12416_to_fp16, gamma = model_model_layers_23_self_attn_k_norm_weight_to_fp16, x = input_421)[name = string("op_12428_cast_fp16")]; tensor var_12431 = mul(x = var_12410_cast_fp16, y = cos_1_cast_fp16)[name = string("op_12431")]; tensor x1_93_begin_0 = const()[name = string("x1_93_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_93_end_0 = const()[name = string("x1_93_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_93_end_mask_0 = const()[name = string("x1_93_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_93 = slice_by_index(begin = x1_93_begin_0, end = x1_93_end_0, end_mask = x1_93_end_mask_0, x = var_12410_cast_fp16)[name = string("x1_93")]; tensor x2_93_begin_0 = const()[name = string("x2_93_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_93_end_0 = const()[name = string("x2_93_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_93_end_mask_0 = const()[name = string("x2_93_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_93 = slice_by_index(begin = x2_93_begin_0, end = x2_93_end_0, end_mask = x2_93_end_mask_0, x = var_12410_cast_fp16)[name = string("x2_93")]; fp16 const_419_promoted = const()[name = string("const_419_promoted"), val = fp16(-0x1p+0)]; tensor var_12452 = mul(x = x2_93, y = const_419_promoted)[name = string("op_12452")]; int32 var_12454 = const()[name = string("op_12454"), val = int32(-1)]; bool var_12455_interleave_0 = const()[name = string("op_12455_interleave_0"), val = bool(false)]; tensor var_12455 = concat(axis = var_12454, interleave = var_12455_interleave_0, values = (var_12452, x1_93))[name = string("op_12455")]; tensor var_12456 = mul(x = var_12455, y = sin_1_cast_fp16)[name = string("op_12456")]; tensor query_states_93 = add(x = var_12431, y = var_12456)[name = string("query_states_93")]; tensor var_12459 = mul(x = var_12428_cast_fp16, y = cos_1_cast_fp16)[name = string("op_12459")]; tensor x1_95_begin_0 = const()[name = string("x1_95_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_95_end_0 = const()[name = string("x1_95_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_95_end_mask_0 = const()[name = string("x1_95_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_95 = slice_by_index(begin = x1_95_begin_0, end = x1_95_end_0, end_mask = x1_95_end_mask_0, x = var_12428_cast_fp16)[name = string("x1_95")]; tensor x2_95_begin_0 = const()[name = string("x2_95_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_95_end_0 = const()[name = string("x2_95_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_95_end_mask_0 = const()[name = string("x2_95_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_95 = slice_by_index(begin = x2_95_begin_0, end = x2_95_end_0, end_mask = x2_95_end_mask_0, x = var_12428_cast_fp16)[name = string("x2_95")]; fp16 const_422_promoted = const()[name = string("const_422_promoted"), val = fp16(-0x1p+0)]; tensor var_12480 = mul(x = x2_95, y = const_422_promoted)[name = string("op_12480")]; int32 var_12482 = const()[name = string("op_12482"), val = int32(-1)]; bool var_12483_interleave_0 = const()[name = string("op_12483_interleave_0"), val = bool(false)]; tensor var_12483 = concat(axis = var_12482, interleave = var_12483_interleave_0, values = (var_12480, x1_95))[name = string("op_12483")]; tensor var_12484 = mul(x = var_12483, y = sin_1_cast_fp16)[name = string("op_12484")]; tensor key_states_93 = add(x = var_12459, y = var_12484)[name = string("key_states_93")]; tensor expand_dims_276 = const()[name = string("expand_dims_276"), val = tensor([23])]; tensor expand_dims_277 = const()[name = string("expand_dims_277"), val = tensor([0])]; tensor expand_dims_279 = const()[name = string("expand_dims_279"), val = tensor([0])]; tensor expand_dims_280 = const()[name = string("expand_dims_280"), val = tensor([24])]; int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)]; bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)]; tensor concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (expand_dims_276, expand_dims_277, current_pos, expand_dims_279))[name = string("concat_186")]; tensor concat_187_values1_0 = const()[name = string("concat_187_values1_0"), val = tensor([0])]; tensor concat_187_values3_0 = const()[name = string("concat_187_values3_0"), val = tensor([0])]; int32 concat_187_axis_0 = const()[name = string("concat_187_axis_0"), val = int32(0)]; bool concat_187_interleave_0 = const()[name = string("concat_187_interleave_0"), val = bool(false)]; tensor concat_187 = concat(axis = concat_187_axis_0, interleave = concat_187_interleave_0, values = (expand_dims_280, concat_187_values1_0, var_1725, concat_187_values3_0))[name = string("concat_187")]; tensor model_model_kv_cache_0_internal_tensor_assign_47_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_47_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_47_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_47_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_47_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_47_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_47_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_47_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_47_cast_fp16 = slice_update(begin = concat_186, begin_mask = model_model_kv_cache_0_internal_tensor_assign_47_begin_mask_0, end = concat_187, end_mask = model_model_kv_cache_0_internal_tensor_assign_47_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_47_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_47_stride_0, update = key_states_93, x = coreml_update_state_101)[name = string("model_model_kv_cache_0_internal_tensor_assign_47_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_47_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_46_write_state")]; tensor coreml_update_state_102 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_46")]; tensor expand_dims_282 = const()[name = string("expand_dims_282"), val = tensor([51])]; tensor expand_dims_283 = const()[name = string("expand_dims_283"), val = tensor([0])]; tensor expand_dims_285 = const()[name = string("expand_dims_285"), val = tensor([0])]; tensor expand_dims_286 = const()[name = string("expand_dims_286"), val = tensor([52])]; int32 concat_190_axis_0 = const()[name = string("concat_190_axis_0"), val = int32(0)]; bool concat_190_interleave_0 = const()[name = string("concat_190_interleave_0"), val = bool(false)]; tensor concat_190 = concat(axis = concat_190_axis_0, interleave = concat_190_interleave_0, values = (expand_dims_282, expand_dims_283, current_pos, expand_dims_285))[name = string("concat_190")]; tensor concat_191_values1_0 = const()[name = string("concat_191_values1_0"), val = tensor([0])]; tensor concat_191_values3_0 = const()[name = string("concat_191_values3_0"), val = tensor([0])]; int32 concat_191_axis_0 = const()[name = string("concat_191_axis_0"), val = int32(0)]; bool concat_191_interleave_0 = const()[name = string("concat_191_interleave_0"), val = bool(false)]; tensor concat_191 = concat(axis = concat_191_axis_0, interleave = concat_191_interleave_0, values = (expand_dims_286, concat_191_values1_0, var_1725, concat_191_values3_0))[name = string("concat_191")]; tensor model_model_kv_cache_0_internal_tensor_assign_48_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_48_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_48_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_48_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_48_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_48_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_48_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_48_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_48_cast_fp16 = slice_update(begin = concat_190, begin_mask = model_model_kv_cache_0_internal_tensor_assign_48_begin_mask_0, end = concat_191, end_mask = model_model_kv_cache_0_internal_tensor_assign_48_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_48_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_48_stride_0, update = var_12389, x = coreml_update_state_102)[name = string("model_model_kv_cache_0_internal_tensor_assign_48_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_48_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_47_write_state")]; tensor coreml_update_state_103 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_47")]; tensor var_12539_begin_0 = const()[name = string("op_12539_begin_0"), val = tensor([23, 0, 0, 0])]; tensor var_12539_end_0 = const()[name = string("op_12539_end_0"), val = tensor([24, 8, 1024, 128])]; tensor var_12539_end_mask_0 = const()[name = string("op_12539_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_12539_cast_fp16 = slice_by_index(begin = var_12539_begin_0, end = var_12539_end_0, end_mask = var_12539_end_mask_0, x = coreml_update_state_103)[name = string("op_12539_cast_fp16")]; tensor K_layer_cache_47_axes_0 = const()[name = string("K_layer_cache_47_axes_0"), val = tensor([0])]; tensor K_layer_cache_47_cast_fp16 = squeeze(axes = K_layer_cache_47_axes_0, x = var_12539_cast_fp16)[name = string("K_layer_cache_47_cast_fp16")]; tensor var_12546_begin_0 = const()[name = string("op_12546_begin_0"), val = tensor([51, 0, 0, 0])]; tensor var_12546_end_0 = const()[name = string("op_12546_end_0"), val = tensor([52, 8, 1024, 128])]; tensor var_12546_end_mask_0 = const()[name = string("op_12546_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_12546_cast_fp16 = slice_by_index(begin = var_12546_begin_0, end = var_12546_end_0, end_mask = var_12546_end_mask_0, x = coreml_update_state_103)[name = string("op_12546_cast_fp16")]; tensor V_layer_cache_47_axes_0 = const()[name = string("V_layer_cache_47_axes_0"), val = tensor([0])]; tensor V_layer_cache_47_cast_fp16 = squeeze(axes = V_layer_cache_47_axes_0, x = var_12546_cast_fp16)[name = string("V_layer_cache_47_cast_fp16")]; tensor x_467_axes_0 = const()[name = string("x_467_axes_0"), val = tensor([1])]; tensor x_467_cast_fp16 = expand_dims(axes = x_467_axes_0, x = K_layer_cache_47_cast_fp16)[name = string("x_467_cast_fp16")]; tensor var_12583 = const()[name = string("op_12583"), val = tensor([1, 2, 1, 1])]; tensor x_469_cast_fp16 = tile(reps = var_12583, x = x_467_cast_fp16)[name = string("x_469_cast_fp16")]; tensor var_12595 = const()[name = string("op_12595"), val = tensor([1, -1, 1024, 128])]; tensor key_states_95_cast_fp16 = reshape(shape = var_12595, x = x_469_cast_fp16)[name = string("key_states_95_cast_fp16")]; tensor x_473_axes_0 = const()[name = string("x_473_axes_0"), val = tensor([1])]; tensor x_473_cast_fp16 = expand_dims(axes = x_473_axes_0, x = V_layer_cache_47_cast_fp16)[name = string("x_473_cast_fp16")]; tensor var_12603 = const()[name = string("op_12603"), val = tensor([1, 2, 1, 1])]; tensor x_475_cast_fp16 = tile(reps = var_12603, x = x_473_cast_fp16)[name = string("x_475_cast_fp16")]; tensor var_12615 = const()[name = string("op_12615"), val = tensor([1, -1, 1024, 128])]; tensor value_states_141_cast_fp16 = reshape(shape = var_12615, x = x_475_cast_fp16)[name = string("value_states_141_cast_fp16")]; bool var_12630_transpose_x_1 = const()[name = string("op_12630_transpose_x_1"), val = bool(false)]; bool var_12630_transpose_y_1 = const()[name = string("op_12630_transpose_y_1"), val = bool(true)]; tensor var_12630 = matmul(transpose_x = var_12630_transpose_x_1, transpose_y = var_12630_transpose_y_1, x = query_states_93, y = key_states_95_cast_fp16)[name = string("op_12630")]; fp16 var_12631_to_fp16 = const()[name = string("op_12631_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_139_cast_fp16 = mul(x = var_12630, y = var_12631_to_fp16)[name = string("attn_weights_139_cast_fp16")]; tensor attn_weights_141_cast_fp16 = add(x = attn_weights_139_cast_fp16, y = causal_mask)[name = string("attn_weights_141_cast_fp16")]; int32 var_12666 = const()[name = string("op_12666"), val = int32(-1)]; tensor attn_weights_143_cast_fp16 = softmax(axis = var_12666, x = attn_weights_141_cast_fp16)[name = string("attn_weights_143_cast_fp16")]; bool attn_output_231_transpose_x_0 = const()[name = string("attn_output_231_transpose_x_0"), val = bool(false)]; bool attn_output_231_transpose_y_0 = const()[name = string("attn_output_231_transpose_y_0"), val = bool(false)]; tensor attn_output_231_cast_fp16 = matmul(transpose_x = attn_output_231_transpose_x_0, transpose_y = attn_output_231_transpose_y_0, x = attn_weights_143_cast_fp16, y = value_states_141_cast_fp16)[name = string("attn_output_231_cast_fp16")]; tensor var_12677_perm_0 = const()[name = string("op_12677_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_12681 = const()[name = string("op_12681"), val = tensor([1, 1, 2048])]; tensor var_12677_cast_fp16 = transpose(perm = var_12677_perm_0, x = attn_output_231_cast_fp16)[name = string("transpose_28")]; tensor attn_output_235_cast_fp16 = reshape(shape = var_12681, x = var_12677_cast_fp16)[name = string("attn_output_235_cast_fp16")]; tensor var_12686 = const()[name = string("op_12686"), val = tensor([0, 2, 1])]; string var_12702_pad_type_0 = const()[name = string("op_12702_pad_type_0"), val = string("valid")]; int32 var_12702_groups_0 = const()[name = string("op_12702_groups_0"), val = int32(1)]; tensor var_12702_strides_0 = const()[name = string("op_12702_strides_0"), val = tensor([1])]; tensor var_12702_pad_0 = const()[name = string("op_12702_pad_0"), val = tensor([0, 0])]; tensor var_12702_dilations_0 = const()[name = string("op_12702_dilations_0"), val = tensor([1])]; tensor squeeze_23_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1084037120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1087182912))))[name = string("squeeze_23_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_12687_cast_fp16 = transpose(perm = var_12686, x = attn_output_235_cast_fp16)[name = string("transpose_27")]; tensor var_12702_cast_fp16 = conv(dilations = var_12702_dilations_0, groups = var_12702_groups_0, pad = var_12702_pad_0, pad_type = var_12702_pad_type_0, strides = var_12702_strides_0, weight = squeeze_23_cast_fp16_to_fp32_to_fp16_palettized, x = var_12687_cast_fp16)[name = string("op_12702_cast_fp16")]; tensor var_12706 = const()[name = string("op_12706"), val = tensor([0, 2, 1])]; tensor attn_output_239_cast_fp16 = transpose(perm = var_12706, x = var_12702_cast_fp16)[name = string("transpose_26")]; tensor hidden_states_143_cast_fp16 = add(x = hidden_states_139_cast_fp16, y = attn_output_239_cast_fp16)[name = string("hidden_states_143_cast_fp16")]; tensor mean_191_axes_0 = const()[name = string("mean_191_axes_0"), val = tensor([-1])]; bool mean_191_keep_dims_0 = const()[name = string("mean_191_keep_dims_0"), val = bool(true)]; tensor mean_191_cast_fp16 = reduce_mean(axes = mean_191_axes_0, keep_dims = mean_191_keep_dims_0, x = hidden_states_143_cast_fp16)[name = string("mean_191_cast_fp16")]; tensor input_425_cast_fp16 = sub(x = hidden_states_143_cast_fp16, y = mean_191_cast_fp16)[name = string("input_425_cast_fp16")]; tensor var_12725_axes_0 = const()[name = string("op_12725_axes_0"), val = tensor([-1])]; tensor model_model_layers_23_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_23_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1087215744)))]; fp16 var_12713_to_fp16 = const()[name = string("op_12713_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_12725_cast_fp16 = layer_norm(axes = var_12725_axes_0, epsilon = var_12713_to_fp16, gamma = model_model_layers_23_post_attention_layernorm_weight_to_fp16, x = input_425_cast_fp16)[name = string("op_12725_cast_fp16")]; tensor var_12739 = const()[name = string("op_12739"), val = tensor([0, 2, 1])]; tensor input_427_axes_0 = const()[name = string("input_427_axes_0"), val = tensor([2])]; tensor var_12740 = transpose(perm = var_12739, x = var_12725_cast_fp16)[name = string("transpose_25")]; tensor input_427 = expand_dims(axes = input_427_axes_0, x = var_12740)[name = string("input_427")]; string input_429_pad_type_0 = const()[name = string("input_429_pad_type_0"), val = string("valid")]; tensor input_429_strides_0 = const()[name = string("input_429_strides_0"), val = tensor([1, 1])]; tensor input_429_pad_0 = const()[name = string("input_429_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_429_dilations_0 = const()[name = string("input_429_dilations_0"), val = tensor([1, 1])]; int32 input_429_groups_0 = const()[name = string("input_429_groups_0"), val = int32(1)]; tensor input_429 = conv(dilations = input_429_dilations_0, groups = input_429_groups_0, pad = input_429_pad_0, pad_type = input_429_pad_type_0, strides = input_429_strides_0, weight = model_model_layers_23_mlp_gate_proj_weight_palettized, x = input_427)[name = string("input_429")]; string b_47_pad_type_0 = const()[name = string("b_47_pad_type_0"), val = string("valid")]; tensor b_47_strides_0 = const()[name = string("b_47_strides_0"), val = tensor([1, 1])]; tensor b_47_pad_0 = const()[name = string("b_47_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_47_dilations_0 = const()[name = string("b_47_dilations_0"), val = tensor([1, 1])]; int32 b_47_groups_0 = const()[name = string("b_47_groups_0"), val = int32(1)]; tensor b_47 = conv(dilations = b_47_dilations_0, groups = b_47_groups_0, pad = b_47_pad_0, pad_type = b_47_pad_type_0, strides = b_47_strides_0, weight = model_model_layers_23_mlp_up_proj_weight_palettized, x = input_427)[name = string("b_47")]; tensor c_47 = silu(x = input_429)[name = string("c_47")]; tensor input_431 = mul(x = c_47, y = b_47)[name = string("input_431")]; string e_47_pad_type_0 = const()[name = string("e_47_pad_type_0"), val = string("valid")]; tensor e_47_strides_0 = const()[name = string("e_47_strides_0"), val = tensor([1, 1])]; tensor e_47_pad_0 = const()[name = string("e_47_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_47_dilations_0 = const()[name = string("e_47_dilations_0"), val = tensor([1, 1])]; int32 e_47_groups_0 = const()[name = string("e_47_groups_0"), val = int32(1)]; tensor e_47 = conv(dilations = e_47_dilations_0, groups = e_47_groups_0, pad = e_47_pad_0, pad_type = e_47_pad_type_0, strides = e_47_strides_0, weight = model_model_layers_23_mlp_down_proj_weight_palettized, x = input_431)[name = string("e_47")]; tensor var_12762_axes_0 = const()[name = string("op_12762_axes_0"), val = tensor([2])]; tensor var_12762 = squeeze(axes = var_12762_axes_0, x = e_47)[name = string("op_12762")]; tensor var_12763 = const()[name = string("op_12763"), val = tensor([0, 2, 1])]; tensor var_12764 = transpose(perm = var_12763, x = var_12762)[name = string("transpose_24")]; tensor hidden_states_145_cast_fp16 = add(x = hidden_states_143_cast_fp16, y = var_12764)[name = string("hidden_states_145_cast_fp16")]; tensor mean_193_axes_0 = const()[name = string("mean_193_axes_0"), val = tensor([-1])]; bool mean_193_keep_dims_0 = const()[name = string("mean_193_keep_dims_0"), val = bool(true)]; tensor mean_193_cast_fp16 = reduce_mean(axes = mean_193_axes_0, keep_dims = mean_193_keep_dims_0, x = hidden_states_145_cast_fp16)[name = string("mean_193_cast_fp16")]; tensor input_433_cast_fp16 = sub(x = hidden_states_145_cast_fp16, y = mean_193_cast_fp16)[name = string("input_433_cast_fp16")]; tensor var_12782_axes_0 = const()[name = string("op_12782_axes_0"), val = tensor([-1])]; tensor model_model_layers_24_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_24_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1087219904)))]; fp16 var_12770_to_fp16 = const()[name = string("op_12770_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_12782_cast_fp16 = layer_norm(axes = var_12782_axes_0, epsilon = var_12770_to_fp16, gamma = model_model_layers_24_input_layernorm_weight_to_fp16, x = input_433_cast_fp16)[name = string("op_12782_cast_fp16")]; tensor var_12788 = const()[name = string("op_12788"), val = tensor([0, 2, 1])]; tensor var_12791_axes_0 = const()[name = string("op_12791_axes_0"), val = tensor([2])]; tensor var_12789 = transpose(perm = var_12788, x = var_12782_cast_fp16)[name = string("transpose_23")]; tensor var_12791 = expand_dims(axes = var_12791_axes_0, x = var_12789)[name = string("op_12791")]; string var_12807_pad_type_0 = const()[name = string("op_12807_pad_type_0"), val = string("valid")]; tensor var_12807_strides_0 = const()[name = string("op_12807_strides_0"), val = tensor([1, 1])]; tensor var_12807_pad_0 = const()[name = string("op_12807_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_12807_dilations_0 = const()[name = string("op_12807_dilations_0"), val = tensor([1, 1])]; int32 var_12807_groups_0 = const()[name = string("op_12807_groups_0"), val = int32(1)]; tensor var_12807 = conv(dilations = var_12807_dilations_0, groups = var_12807_groups_0, pad = var_12807_pad_0, pad_type = var_12807_pad_type_0, strides = var_12807_strides_0, weight = model_model_layers_24_self_attn_q_proj_weight_palettized, x = var_12791)[name = string("op_12807")]; tensor var_12812 = const()[name = string("op_12812"), val = tensor([1, 16, 1, 128])]; tensor var_12813 = reshape(shape = var_12812, x = var_12807)[name = string("op_12813")]; string var_12829_pad_type_0 = const()[name = string("op_12829_pad_type_0"), val = string("valid")]; tensor var_12829_strides_0 = const()[name = string("op_12829_strides_0"), val = tensor([1, 1])]; tensor var_12829_pad_0 = const()[name = string("op_12829_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_12829_dilations_0 = const()[name = string("op_12829_dilations_0"), val = tensor([1, 1])]; int32 var_12829_groups_0 = const()[name = string("op_12829_groups_0"), val = int32(1)]; tensor var_12829 = conv(dilations = var_12829_dilations_0, groups = var_12829_groups_0, pad = var_12829_pad_0, pad_type = var_12829_pad_type_0, strides = var_12829_strides_0, weight = model_model_layers_24_self_attn_k_proj_weight_palettized, x = var_12791)[name = string("op_12829")]; tensor var_12834 = const()[name = string("op_12834"), val = tensor([1, 8, 1, 128])]; tensor var_12835 = reshape(shape = var_12834, x = var_12829)[name = string("op_12835")]; string var_12851_pad_type_0 = const()[name = string("op_12851_pad_type_0"), val = string("valid")]; tensor var_12851_strides_0 = const()[name = string("op_12851_strides_0"), val = tensor([1, 1])]; tensor var_12851_pad_0 = const()[name = string("op_12851_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_12851_dilations_0 = const()[name = string("op_12851_dilations_0"), val = tensor([1, 1])]; int32 var_12851_groups_0 = const()[name = string("op_12851_groups_0"), val = int32(1)]; tensor var_12851 = conv(dilations = var_12851_dilations_0, groups = var_12851_groups_0, pad = var_12851_pad_0, pad_type = var_12851_pad_type_0, strides = var_12851_strides_0, weight = model_model_layers_24_self_attn_v_proj_weight_palettized, x = var_12791)[name = string("op_12851")]; tensor var_12856 = const()[name = string("op_12856"), val = tensor([1, 8, 1, 128])]; tensor var_12857 = reshape(shape = var_12856, x = var_12851)[name = string("op_12857")]; tensor mean_195_axes_0 = const()[name = string("mean_195_axes_0"), val = tensor([-1])]; bool mean_195_keep_dims_0 = const()[name = string("mean_195_keep_dims_0"), val = bool(true)]; tensor mean_195 = reduce_mean(axes = mean_195_axes_0, keep_dims = mean_195_keep_dims_0, x = var_12813)[name = string("mean_195")]; tensor input_437 = sub(x = var_12813, y = mean_195)[name = string("input_437")]; tensor var_12878_axes_0 = const()[name = string("op_12878_axes_0"), val = tensor([-1])]; tensor model_model_layers_24_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_24_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1087224064)))]; fp16 var_12866_to_fp16 = const()[name = string("op_12866_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_12878_cast_fp16 = layer_norm(axes = var_12878_axes_0, epsilon = var_12866_to_fp16, gamma = model_model_layers_24_self_attn_q_norm_weight_to_fp16, x = input_437)[name = string("op_12878_cast_fp16")]; tensor mean_197_axes_0 = const()[name = string("mean_197_axes_0"), val = tensor([-1])]; bool mean_197_keep_dims_0 = const()[name = string("mean_197_keep_dims_0"), val = bool(true)]; tensor mean_197 = reduce_mean(axes = mean_197_axes_0, keep_dims = mean_197_keep_dims_0, x = var_12835)[name = string("mean_197")]; tensor input_439 = sub(x = var_12835, y = mean_197)[name = string("input_439")]; tensor var_12896_axes_0 = const()[name = string("op_12896_axes_0"), val = tensor([-1])]; tensor model_model_layers_24_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_24_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1087224384)))]; fp16 var_12884_to_fp16 = const()[name = string("op_12884_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_12896_cast_fp16 = layer_norm(axes = var_12896_axes_0, epsilon = var_12884_to_fp16, gamma = model_model_layers_24_self_attn_k_norm_weight_to_fp16, x = input_439)[name = string("op_12896_cast_fp16")]; tensor var_12899 = mul(x = var_12878_cast_fp16, y = cos_1_cast_fp16)[name = string("op_12899")]; tensor x1_97_begin_0 = const()[name = string("x1_97_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_97_end_0 = const()[name = string("x1_97_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_97_end_mask_0 = const()[name = string("x1_97_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_97 = slice_by_index(begin = x1_97_begin_0, end = x1_97_end_0, end_mask = x1_97_end_mask_0, x = var_12878_cast_fp16)[name = string("x1_97")]; tensor x2_97_begin_0 = const()[name = string("x2_97_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_97_end_0 = const()[name = string("x2_97_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_97_end_mask_0 = const()[name = string("x2_97_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_97 = slice_by_index(begin = x2_97_begin_0, end = x2_97_end_0, end_mask = x2_97_end_mask_0, x = var_12878_cast_fp16)[name = string("x2_97")]; fp16 const_437_promoted = const()[name = string("const_437_promoted"), val = fp16(-0x1p+0)]; tensor var_12920 = mul(x = x2_97, y = const_437_promoted)[name = string("op_12920")]; int32 var_12922 = const()[name = string("op_12922"), val = int32(-1)]; bool var_12923_interleave_0 = const()[name = string("op_12923_interleave_0"), val = bool(false)]; tensor var_12923 = concat(axis = var_12922, interleave = var_12923_interleave_0, values = (var_12920, x1_97))[name = string("op_12923")]; tensor var_12924 = mul(x = var_12923, y = sin_1_cast_fp16)[name = string("op_12924")]; tensor query_states_97 = add(x = var_12899, y = var_12924)[name = string("query_states_97")]; tensor var_12927 = mul(x = var_12896_cast_fp16, y = cos_1_cast_fp16)[name = string("op_12927")]; tensor x1_99_begin_0 = const()[name = string("x1_99_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_99_end_0 = const()[name = string("x1_99_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_99_end_mask_0 = const()[name = string("x1_99_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_99 = slice_by_index(begin = x1_99_begin_0, end = x1_99_end_0, end_mask = x1_99_end_mask_0, x = var_12896_cast_fp16)[name = string("x1_99")]; tensor x2_99_begin_0 = const()[name = string("x2_99_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_99_end_0 = const()[name = string("x2_99_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_99_end_mask_0 = const()[name = string("x2_99_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_99 = slice_by_index(begin = x2_99_begin_0, end = x2_99_end_0, end_mask = x2_99_end_mask_0, x = var_12896_cast_fp16)[name = string("x2_99")]; fp16 const_440_promoted = const()[name = string("const_440_promoted"), val = fp16(-0x1p+0)]; tensor var_12948 = mul(x = x2_99, y = const_440_promoted)[name = string("op_12948")]; int32 var_12950 = const()[name = string("op_12950"), val = int32(-1)]; bool var_12951_interleave_0 = const()[name = string("op_12951_interleave_0"), val = bool(false)]; tensor var_12951 = concat(axis = var_12950, interleave = var_12951_interleave_0, values = (var_12948, x1_99))[name = string("op_12951")]; tensor var_12952 = mul(x = var_12951, y = sin_1_cast_fp16)[name = string("op_12952")]; tensor key_states_97 = add(x = var_12927, y = var_12952)[name = string("key_states_97")]; tensor expand_dims_288 = const()[name = string("expand_dims_288"), val = tensor([24])]; tensor expand_dims_289 = const()[name = string("expand_dims_289"), val = tensor([0])]; tensor expand_dims_291 = const()[name = string("expand_dims_291"), val = tensor([0])]; tensor expand_dims_292 = const()[name = string("expand_dims_292"), val = tensor([25])]; int32 concat_194_axis_0 = const()[name = string("concat_194_axis_0"), val = int32(0)]; bool concat_194_interleave_0 = const()[name = string("concat_194_interleave_0"), val = bool(false)]; tensor concat_194 = concat(axis = concat_194_axis_0, interleave = concat_194_interleave_0, values = (expand_dims_288, expand_dims_289, current_pos, expand_dims_291))[name = string("concat_194")]; tensor concat_195_values1_0 = const()[name = string("concat_195_values1_0"), val = tensor([0])]; tensor concat_195_values3_0 = const()[name = string("concat_195_values3_0"), val = tensor([0])]; int32 concat_195_axis_0 = const()[name = string("concat_195_axis_0"), val = int32(0)]; bool concat_195_interleave_0 = const()[name = string("concat_195_interleave_0"), val = bool(false)]; tensor concat_195 = concat(axis = concat_195_axis_0, interleave = concat_195_interleave_0, values = (expand_dims_292, concat_195_values1_0, var_1725, concat_195_values3_0))[name = string("concat_195")]; tensor model_model_kv_cache_0_internal_tensor_assign_49_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_49_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_49_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_49_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_49_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_49_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_49_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_49_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_49_cast_fp16 = slice_update(begin = concat_194, begin_mask = model_model_kv_cache_0_internal_tensor_assign_49_begin_mask_0, end = concat_195, end_mask = model_model_kv_cache_0_internal_tensor_assign_49_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_49_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_49_stride_0, update = key_states_97, x = coreml_update_state_103)[name = string("model_model_kv_cache_0_internal_tensor_assign_49_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_49_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_48_write_state")]; tensor coreml_update_state_104 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_48")]; tensor expand_dims_294 = const()[name = string("expand_dims_294"), val = tensor([52])]; tensor expand_dims_295 = const()[name = string("expand_dims_295"), val = tensor([0])]; tensor expand_dims_297 = const()[name = string("expand_dims_297"), val = tensor([0])]; tensor expand_dims_298 = const()[name = string("expand_dims_298"), val = tensor([53])]; int32 concat_198_axis_0 = const()[name = string("concat_198_axis_0"), val = int32(0)]; bool concat_198_interleave_0 = const()[name = string("concat_198_interleave_0"), val = bool(false)]; tensor concat_198 = concat(axis = concat_198_axis_0, interleave = concat_198_interleave_0, values = (expand_dims_294, expand_dims_295, current_pos, expand_dims_297))[name = string("concat_198")]; tensor concat_199_values1_0 = const()[name = string("concat_199_values1_0"), val = tensor([0])]; tensor concat_199_values3_0 = const()[name = string("concat_199_values3_0"), val = tensor([0])]; int32 concat_199_axis_0 = const()[name = string("concat_199_axis_0"), val = int32(0)]; bool concat_199_interleave_0 = const()[name = string("concat_199_interleave_0"), val = bool(false)]; tensor concat_199 = concat(axis = concat_199_axis_0, interleave = concat_199_interleave_0, values = (expand_dims_298, concat_199_values1_0, var_1725, concat_199_values3_0))[name = string("concat_199")]; tensor model_model_kv_cache_0_internal_tensor_assign_50_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_50_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_50_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_50_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_50_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_50_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_50_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_50_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_50_cast_fp16 = slice_update(begin = concat_198, begin_mask = model_model_kv_cache_0_internal_tensor_assign_50_begin_mask_0, end = concat_199, end_mask = model_model_kv_cache_0_internal_tensor_assign_50_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_50_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_50_stride_0, update = var_12857, x = coreml_update_state_104)[name = string("model_model_kv_cache_0_internal_tensor_assign_50_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_50_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_49_write_state")]; tensor coreml_update_state_105 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_49")]; tensor var_13007_begin_0 = const()[name = string("op_13007_begin_0"), val = tensor([24, 0, 0, 0])]; tensor var_13007_end_0 = const()[name = string("op_13007_end_0"), val = tensor([25, 8, 1024, 128])]; tensor var_13007_end_mask_0 = const()[name = string("op_13007_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_13007_cast_fp16 = slice_by_index(begin = var_13007_begin_0, end = var_13007_end_0, end_mask = var_13007_end_mask_0, x = coreml_update_state_105)[name = string("op_13007_cast_fp16")]; tensor K_layer_cache_49_axes_0 = const()[name = string("K_layer_cache_49_axes_0"), val = tensor([0])]; tensor K_layer_cache_49_cast_fp16 = squeeze(axes = K_layer_cache_49_axes_0, x = var_13007_cast_fp16)[name = string("K_layer_cache_49_cast_fp16")]; tensor var_13014_begin_0 = const()[name = string("op_13014_begin_0"), val = tensor([52, 0, 0, 0])]; tensor var_13014_end_0 = const()[name = string("op_13014_end_0"), val = tensor([53, 8, 1024, 128])]; tensor var_13014_end_mask_0 = const()[name = string("op_13014_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_13014_cast_fp16 = slice_by_index(begin = var_13014_begin_0, end = var_13014_end_0, end_mask = var_13014_end_mask_0, x = coreml_update_state_105)[name = string("op_13014_cast_fp16")]; tensor V_layer_cache_49_axes_0 = const()[name = string("V_layer_cache_49_axes_0"), val = tensor([0])]; tensor V_layer_cache_49_cast_fp16 = squeeze(axes = V_layer_cache_49_axes_0, x = var_13014_cast_fp16)[name = string("V_layer_cache_49_cast_fp16")]; tensor x_487_axes_0 = const()[name = string("x_487_axes_0"), val = tensor([1])]; tensor x_487_cast_fp16 = expand_dims(axes = x_487_axes_0, x = K_layer_cache_49_cast_fp16)[name = string("x_487_cast_fp16")]; tensor var_13051 = const()[name = string("op_13051"), val = tensor([1, 2, 1, 1])]; tensor x_489_cast_fp16 = tile(reps = var_13051, x = x_487_cast_fp16)[name = string("x_489_cast_fp16")]; tensor var_13063 = const()[name = string("op_13063"), val = tensor([1, -1, 1024, 128])]; tensor key_states_99_cast_fp16 = reshape(shape = var_13063, x = x_489_cast_fp16)[name = string("key_states_99_cast_fp16")]; tensor x_493_axes_0 = const()[name = string("x_493_axes_0"), val = tensor([1])]; tensor x_493_cast_fp16 = expand_dims(axes = x_493_axes_0, x = V_layer_cache_49_cast_fp16)[name = string("x_493_cast_fp16")]; tensor var_13071 = const()[name = string("op_13071"), val = tensor([1, 2, 1, 1])]; tensor x_495_cast_fp16 = tile(reps = var_13071, x = x_493_cast_fp16)[name = string("x_495_cast_fp16")]; tensor var_13083 = const()[name = string("op_13083"), val = tensor([1, -1, 1024, 128])]; tensor value_states_147_cast_fp16 = reshape(shape = var_13083, x = x_495_cast_fp16)[name = string("value_states_147_cast_fp16")]; bool var_13098_transpose_x_1 = const()[name = string("op_13098_transpose_x_1"), val = bool(false)]; bool var_13098_transpose_y_1 = const()[name = string("op_13098_transpose_y_1"), val = bool(true)]; tensor var_13098 = matmul(transpose_x = var_13098_transpose_x_1, transpose_y = var_13098_transpose_y_1, x = query_states_97, y = key_states_99_cast_fp16)[name = string("op_13098")]; fp16 var_13099_to_fp16 = const()[name = string("op_13099_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_145_cast_fp16 = mul(x = var_13098, y = var_13099_to_fp16)[name = string("attn_weights_145_cast_fp16")]; tensor attn_weights_147_cast_fp16 = add(x = attn_weights_145_cast_fp16, y = causal_mask)[name = string("attn_weights_147_cast_fp16")]; int32 var_13134 = const()[name = string("op_13134"), val = int32(-1)]; tensor attn_weights_149_cast_fp16 = softmax(axis = var_13134, x = attn_weights_147_cast_fp16)[name = string("attn_weights_149_cast_fp16")]; bool attn_output_241_transpose_x_0 = const()[name = string("attn_output_241_transpose_x_0"), val = bool(false)]; bool attn_output_241_transpose_y_0 = const()[name = string("attn_output_241_transpose_y_0"), val = bool(false)]; tensor attn_output_241_cast_fp16 = matmul(transpose_x = attn_output_241_transpose_x_0, transpose_y = attn_output_241_transpose_y_0, x = attn_weights_149_cast_fp16, y = value_states_147_cast_fp16)[name = string("attn_output_241_cast_fp16")]; tensor var_13145_perm_0 = const()[name = string("op_13145_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_13149 = const()[name = string("op_13149"), val = tensor([1, 1, 2048])]; tensor var_13145_cast_fp16 = transpose(perm = var_13145_perm_0, x = attn_output_241_cast_fp16)[name = string("transpose_22")]; tensor attn_output_245_cast_fp16 = reshape(shape = var_13149, x = var_13145_cast_fp16)[name = string("attn_output_245_cast_fp16")]; tensor var_13154 = const()[name = string("op_13154"), val = tensor([0, 2, 1])]; string var_13170_pad_type_0 = const()[name = string("op_13170_pad_type_0"), val = string("valid")]; int32 var_13170_groups_0 = const()[name = string("op_13170_groups_0"), val = int32(1)]; tensor var_13170_strides_0 = const()[name = string("op_13170_strides_0"), val = tensor([1])]; tensor var_13170_pad_0 = const()[name = string("op_13170_pad_0"), val = tensor([0, 0])]; tensor var_13170_dilations_0 = const()[name = string("op_13170_dilations_0"), val = tensor([1])]; tensor squeeze_24_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1087224704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1090370496))))[name = string("squeeze_24_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_13155_cast_fp16 = transpose(perm = var_13154, x = attn_output_245_cast_fp16)[name = string("transpose_21")]; tensor var_13170_cast_fp16 = conv(dilations = var_13170_dilations_0, groups = var_13170_groups_0, pad = var_13170_pad_0, pad_type = var_13170_pad_type_0, strides = var_13170_strides_0, weight = squeeze_24_cast_fp16_to_fp32_to_fp16_palettized, x = var_13155_cast_fp16)[name = string("op_13170_cast_fp16")]; tensor var_13174 = const()[name = string("op_13174"), val = tensor([0, 2, 1])]; tensor attn_output_249_cast_fp16 = transpose(perm = var_13174, x = var_13170_cast_fp16)[name = string("transpose_20")]; tensor hidden_states_149_cast_fp16 = add(x = hidden_states_145_cast_fp16, y = attn_output_249_cast_fp16)[name = string("hidden_states_149_cast_fp16")]; tensor mean_199_axes_0 = const()[name = string("mean_199_axes_0"), val = tensor([-1])]; bool mean_199_keep_dims_0 = const()[name = string("mean_199_keep_dims_0"), val = bool(true)]; tensor mean_199_cast_fp16 = reduce_mean(axes = mean_199_axes_0, keep_dims = mean_199_keep_dims_0, x = hidden_states_149_cast_fp16)[name = string("mean_199_cast_fp16")]; tensor input_443_cast_fp16 = sub(x = hidden_states_149_cast_fp16, y = mean_199_cast_fp16)[name = string("input_443_cast_fp16")]; tensor var_13193_axes_0 = const()[name = string("op_13193_axes_0"), val = tensor([-1])]; tensor model_model_layers_24_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_24_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1090403328)))]; fp16 var_13181_to_fp16 = const()[name = string("op_13181_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_13193_cast_fp16 = layer_norm(axes = var_13193_axes_0, epsilon = var_13181_to_fp16, gamma = model_model_layers_24_post_attention_layernorm_weight_to_fp16, x = input_443_cast_fp16)[name = string("op_13193_cast_fp16")]; tensor var_13207 = const()[name = string("op_13207"), val = tensor([0, 2, 1])]; tensor input_445_axes_0 = const()[name = string("input_445_axes_0"), val = tensor([2])]; tensor var_13208 = transpose(perm = var_13207, x = var_13193_cast_fp16)[name = string("transpose_19")]; tensor input_445 = expand_dims(axes = input_445_axes_0, x = var_13208)[name = string("input_445")]; string input_447_pad_type_0 = const()[name = string("input_447_pad_type_0"), val = string("valid")]; tensor input_447_strides_0 = const()[name = string("input_447_strides_0"), val = tensor([1, 1])]; tensor input_447_pad_0 = const()[name = string("input_447_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_447_dilations_0 = const()[name = string("input_447_dilations_0"), val = tensor([1, 1])]; int32 input_447_groups_0 = const()[name = string("input_447_groups_0"), val = int32(1)]; tensor input_447 = conv(dilations = input_447_dilations_0, groups = input_447_groups_0, pad = input_447_pad_0, pad_type = input_447_pad_type_0, strides = input_447_strides_0, weight = model_model_layers_24_mlp_gate_proj_weight_palettized, x = input_445)[name = string("input_447")]; string b_49_pad_type_0 = const()[name = string("b_49_pad_type_0"), val = string("valid")]; tensor b_49_strides_0 = const()[name = string("b_49_strides_0"), val = tensor([1, 1])]; tensor b_49_pad_0 = const()[name = string("b_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_49_dilations_0 = const()[name = string("b_49_dilations_0"), val = tensor([1, 1])]; int32 b_49_groups_0 = const()[name = string("b_49_groups_0"), val = int32(1)]; tensor b_49 = conv(dilations = b_49_dilations_0, groups = b_49_groups_0, pad = b_49_pad_0, pad_type = b_49_pad_type_0, strides = b_49_strides_0, weight = model_model_layers_24_mlp_up_proj_weight_palettized, x = input_445)[name = string("b_49")]; tensor c_49 = silu(x = input_447)[name = string("c_49")]; tensor input_449 = mul(x = c_49, y = b_49)[name = string("input_449")]; string e_49_pad_type_0 = const()[name = string("e_49_pad_type_0"), val = string("valid")]; tensor e_49_strides_0 = const()[name = string("e_49_strides_0"), val = tensor([1, 1])]; tensor e_49_pad_0 = const()[name = string("e_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_49_dilations_0 = const()[name = string("e_49_dilations_0"), val = tensor([1, 1])]; int32 e_49_groups_0 = const()[name = string("e_49_groups_0"), val = int32(1)]; tensor e_49 = conv(dilations = e_49_dilations_0, groups = e_49_groups_0, pad = e_49_pad_0, pad_type = e_49_pad_type_0, strides = e_49_strides_0, weight = model_model_layers_24_mlp_down_proj_weight_palettized, x = input_449)[name = string("e_49")]; tensor var_13230_axes_0 = const()[name = string("op_13230_axes_0"), val = tensor([2])]; tensor var_13230 = squeeze(axes = var_13230_axes_0, x = e_49)[name = string("op_13230")]; tensor var_13231 = const()[name = string("op_13231"), val = tensor([0, 2, 1])]; tensor var_13232 = transpose(perm = var_13231, x = var_13230)[name = string("transpose_18")]; tensor hidden_states_151_cast_fp16 = add(x = hidden_states_149_cast_fp16, y = var_13232)[name = string("hidden_states_151_cast_fp16")]; tensor mean_201_axes_0 = const()[name = string("mean_201_axes_0"), val = tensor([-1])]; bool mean_201_keep_dims_0 = const()[name = string("mean_201_keep_dims_0"), val = bool(true)]; tensor mean_201_cast_fp16 = reduce_mean(axes = mean_201_axes_0, keep_dims = mean_201_keep_dims_0, x = hidden_states_151_cast_fp16)[name = string("mean_201_cast_fp16")]; tensor input_451_cast_fp16 = sub(x = hidden_states_151_cast_fp16, y = mean_201_cast_fp16)[name = string("input_451_cast_fp16")]; tensor var_13250_axes_0 = const()[name = string("op_13250_axes_0"), val = tensor([-1])]; tensor model_model_layers_25_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_25_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1090407488)))]; fp16 var_13238_to_fp16 = const()[name = string("op_13238_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_13250_cast_fp16 = layer_norm(axes = var_13250_axes_0, epsilon = var_13238_to_fp16, gamma = model_model_layers_25_input_layernorm_weight_to_fp16, x = input_451_cast_fp16)[name = string("op_13250_cast_fp16")]; tensor var_13256 = const()[name = string("op_13256"), val = tensor([0, 2, 1])]; tensor var_13259_axes_0 = const()[name = string("op_13259_axes_0"), val = tensor([2])]; tensor var_13257 = transpose(perm = var_13256, x = var_13250_cast_fp16)[name = string("transpose_17")]; tensor var_13259 = expand_dims(axes = var_13259_axes_0, x = var_13257)[name = string("op_13259")]; string var_13275_pad_type_0 = const()[name = string("op_13275_pad_type_0"), val = string("valid")]; tensor var_13275_strides_0 = const()[name = string("op_13275_strides_0"), val = tensor([1, 1])]; tensor var_13275_pad_0 = const()[name = string("op_13275_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_13275_dilations_0 = const()[name = string("op_13275_dilations_0"), val = tensor([1, 1])]; int32 var_13275_groups_0 = const()[name = string("op_13275_groups_0"), val = int32(1)]; tensor var_13275 = conv(dilations = var_13275_dilations_0, groups = var_13275_groups_0, pad = var_13275_pad_0, pad_type = var_13275_pad_type_0, strides = var_13275_strides_0, weight = model_model_layers_25_self_attn_q_proj_weight_palettized, x = var_13259)[name = string("op_13275")]; tensor var_13280 = const()[name = string("op_13280"), val = tensor([1, 16, 1, 128])]; tensor var_13281 = reshape(shape = var_13280, x = var_13275)[name = string("op_13281")]; string var_13297_pad_type_0 = const()[name = string("op_13297_pad_type_0"), val = string("valid")]; tensor var_13297_strides_0 = const()[name = string("op_13297_strides_0"), val = tensor([1, 1])]; tensor var_13297_pad_0 = const()[name = string("op_13297_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_13297_dilations_0 = const()[name = string("op_13297_dilations_0"), val = tensor([1, 1])]; int32 var_13297_groups_0 = const()[name = string("op_13297_groups_0"), val = int32(1)]; tensor var_13297 = conv(dilations = var_13297_dilations_0, groups = var_13297_groups_0, pad = var_13297_pad_0, pad_type = var_13297_pad_type_0, strides = var_13297_strides_0, weight = model_model_layers_25_self_attn_k_proj_weight_palettized, x = var_13259)[name = string("op_13297")]; tensor var_13302 = const()[name = string("op_13302"), val = tensor([1, 8, 1, 128])]; tensor var_13303 = reshape(shape = var_13302, x = var_13297)[name = string("op_13303")]; string var_13319_pad_type_0 = const()[name = string("op_13319_pad_type_0"), val = string("valid")]; tensor var_13319_strides_0 = const()[name = string("op_13319_strides_0"), val = tensor([1, 1])]; tensor var_13319_pad_0 = const()[name = string("op_13319_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_13319_dilations_0 = const()[name = string("op_13319_dilations_0"), val = tensor([1, 1])]; int32 var_13319_groups_0 = const()[name = string("op_13319_groups_0"), val = int32(1)]; tensor var_13319 = conv(dilations = var_13319_dilations_0, groups = var_13319_groups_0, pad = var_13319_pad_0, pad_type = var_13319_pad_type_0, strides = var_13319_strides_0, weight = model_model_layers_25_self_attn_v_proj_weight_palettized, x = var_13259)[name = string("op_13319")]; tensor var_13324 = const()[name = string("op_13324"), val = tensor([1, 8, 1, 128])]; tensor var_13325 = reshape(shape = var_13324, x = var_13319)[name = string("op_13325")]; tensor mean_203_axes_0 = const()[name = string("mean_203_axes_0"), val = tensor([-1])]; bool mean_203_keep_dims_0 = const()[name = string("mean_203_keep_dims_0"), val = bool(true)]; tensor mean_203 = reduce_mean(axes = mean_203_axes_0, keep_dims = mean_203_keep_dims_0, x = var_13281)[name = string("mean_203")]; tensor input_455 = sub(x = var_13281, y = mean_203)[name = string("input_455")]; tensor var_13346_axes_0 = const()[name = string("op_13346_axes_0"), val = tensor([-1])]; tensor model_model_layers_25_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_25_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1090411648)))]; fp16 var_13334_to_fp16 = const()[name = string("op_13334_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_13346_cast_fp16 = layer_norm(axes = var_13346_axes_0, epsilon = var_13334_to_fp16, gamma = model_model_layers_25_self_attn_q_norm_weight_to_fp16, x = input_455)[name = string("op_13346_cast_fp16")]; tensor mean_205_axes_0 = const()[name = string("mean_205_axes_0"), val = tensor([-1])]; bool mean_205_keep_dims_0 = const()[name = string("mean_205_keep_dims_0"), val = bool(true)]; tensor mean_205 = reduce_mean(axes = mean_205_axes_0, keep_dims = mean_205_keep_dims_0, x = var_13303)[name = string("mean_205")]; tensor input_457 = sub(x = var_13303, y = mean_205)[name = string("input_457")]; tensor var_13364_axes_0 = const()[name = string("op_13364_axes_0"), val = tensor([-1])]; tensor model_model_layers_25_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_25_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1090411968)))]; fp16 var_13352_to_fp16 = const()[name = string("op_13352_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_13364_cast_fp16 = layer_norm(axes = var_13364_axes_0, epsilon = var_13352_to_fp16, gamma = model_model_layers_25_self_attn_k_norm_weight_to_fp16, x = input_457)[name = string("op_13364_cast_fp16")]; tensor var_13367 = mul(x = var_13346_cast_fp16, y = cos_1_cast_fp16)[name = string("op_13367")]; tensor x1_101_begin_0 = const()[name = string("x1_101_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_101_end_0 = const()[name = string("x1_101_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_101_end_mask_0 = const()[name = string("x1_101_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_101 = slice_by_index(begin = x1_101_begin_0, end = x1_101_end_0, end_mask = x1_101_end_mask_0, x = var_13346_cast_fp16)[name = string("x1_101")]; tensor x2_101_begin_0 = const()[name = string("x2_101_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_101_end_0 = const()[name = string("x2_101_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_101_end_mask_0 = const()[name = string("x2_101_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_101 = slice_by_index(begin = x2_101_begin_0, end = x2_101_end_0, end_mask = x2_101_end_mask_0, x = var_13346_cast_fp16)[name = string("x2_101")]; fp16 const_455_promoted = const()[name = string("const_455_promoted"), val = fp16(-0x1p+0)]; tensor var_13388 = mul(x = x2_101, y = const_455_promoted)[name = string("op_13388")]; int32 var_13390 = const()[name = string("op_13390"), val = int32(-1)]; bool var_13391_interleave_0 = const()[name = string("op_13391_interleave_0"), val = bool(false)]; tensor var_13391 = concat(axis = var_13390, interleave = var_13391_interleave_0, values = (var_13388, x1_101))[name = string("op_13391")]; tensor var_13392 = mul(x = var_13391, y = sin_1_cast_fp16)[name = string("op_13392")]; tensor query_states_101 = add(x = var_13367, y = var_13392)[name = string("query_states_101")]; tensor var_13395 = mul(x = var_13364_cast_fp16, y = cos_1_cast_fp16)[name = string("op_13395")]; tensor x1_103_begin_0 = const()[name = string("x1_103_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_103_end_0 = const()[name = string("x1_103_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_103_end_mask_0 = const()[name = string("x1_103_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_103 = slice_by_index(begin = x1_103_begin_0, end = x1_103_end_0, end_mask = x1_103_end_mask_0, x = var_13364_cast_fp16)[name = string("x1_103")]; tensor x2_103_begin_0 = const()[name = string("x2_103_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_103_end_0 = const()[name = string("x2_103_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_103_end_mask_0 = const()[name = string("x2_103_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_103 = slice_by_index(begin = x2_103_begin_0, end = x2_103_end_0, end_mask = x2_103_end_mask_0, x = var_13364_cast_fp16)[name = string("x2_103")]; fp16 const_458_promoted = const()[name = string("const_458_promoted"), val = fp16(-0x1p+0)]; tensor var_13416 = mul(x = x2_103, y = const_458_promoted)[name = string("op_13416")]; int32 var_13418 = const()[name = string("op_13418"), val = int32(-1)]; bool var_13419_interleave_0 = const()[name = string("op_13419_interleave_0"), val = bool(false)]; tensor var_13419 = concat(axis = var_13418, interleave = var_13419_interleave_0, values = (var_13416, x1_103))[name = string("op_13419")]; tensor var_13420 = mul(x = var_13419, y = sin_1_cast_fp16)[name = string("op_13420")]; tensor key_states_101 = add(x = var_13395, y = var_13420)[name = string("key_states_101")]; tensor expand_dims_300 = const()[name = string("expand_dims_300"), val = tensor([25])]; tensor expand_dims_301 = const()[name = string("expand_dims_301"), val = tensor([0])]; tensor expand_dims_303 = const()[name = string("expand_dims_303"), val = tensor([0])]; tensor expand_dims_304 = const()[name = string("expand_dims_304"), val = tensor([26])]; int32 concat_202_axis_0 = const()[name = string("concat_202_axis_0"), val = int32(0)]; bool concat_202_interleave_0 = const()[name = string("concat_202_interleave_0"), val = bool(false)]; tensor concat_202 = concat(axis = concat_202_axis_0, interleave = concat_202_interleave_0, values = (expand_dims_300, expand_dims_301, current_pos, expand_dims_303))[name = string("concat_202")]; tensor concat_203_values1_0 = const()[name = string("concat_203_values1_0"), val = tensor([0])]; tensor concat_203_values3_0 = const()[name = string("concat_203_values3_0"), val = tensor([0])]; int32 concat_203_axis_0 = const()[name = string("concat_203_axis_0"), val = int32(0)]; bool concat_203_interleave_0 = const()[name = string("concat_203_interleave_0"), val = bool(false)]; tensor concat_203 = concat(axis = concat_203_axis_0, interleave = concat_203_interleave_0, values = (expand_dims_304, concat_203_values1_0, var_1725, concat_203_values3_0))[name = string("concat_203")]; tensor model_model_kv_cache_0_internal_tensor_assign_51_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_51_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_51_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_51_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_51_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_51_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_51_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_51_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_51_cast_fp16 = slice_update(begin = concat_202, begin_mask = model_model_kv_cache_0_internal_tensor_assign_51_begin_mask_0, end = concat_203, end_mask = model_model_kv_cache_0_internal_tensor_assign_51_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_51_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_51_stride_0, update = key_states_101, x = coreml_update_state_105)[name = string("model_model_kv_cache_0_internal_tensor_assign_51_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_51_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_50_write_state")]; tensor coreml_update_state_106 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_50")]; tensor expand_dims_306 = const()[name = string("expand_dims_306"), val = tensor([53])]; tensor expand_dims_307 = const()[name = string("expand_dims_307"), val = tensor([0])]; tensor expand_dims_309 = const()[name = string("expand_dims_309"), val = tensor([0])]; tensor expand_dims_310 = const()[name = string("expand_dims_310"), val = tensor([54])]; int32 concat_206_axis_0 = const()[name = string("concat_206_axis_0"), val = int32(0)]; bool concat_206_interleave_0 = const()[name = string("concat_206_interleave_0"), val = bool(false)]; tensor concat_206 = concat(axis = concat_206_axis_0, interleave = concat_206_interleave_0, values = (expand_dims_306, expand_dims_307, current_pos, expand_dims_309))[name = string("concat_206")]; tensor concat_207_values1_0 = const()[name = string("concat_207_values1_0"), val = tensor([0])]; tensor concat_207_values3_0 = const()[name = string("concat_207_values3_0"), val = tensor([0])]; int32 concat_207_axis_0 = const()[name = string("concat_207_axis_0"), val = int32(0)]; bool concat_207_interleave_0 = const()[name = string("concat_207_interleave_0"), val = bool(false)]; tensor concat_207 = concat(axis = concat_207_axis_0, interleave = concat_207_interleave_0, values = (expand_dims_310, concat_207_values1_0, var_1725, concat_207_values3_0))[name = string("concat_207")]; tensor model_model_kv_cache_0_internal_tensor_assign_52_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_52_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_52_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_52_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_52_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_52_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_52_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_52_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_52_cast_fp16 = slice_update(begin = concat_206, begin_mask = model_model_kv_cache_0_internal_tensor_assign_52_begin_mask_0, end = concat_207, end_mask = model_model_kv_cache_0_internal_tensor_assign_52_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_52_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_52_stride_0, update = var_13325, x = coreml_update_state_106)[name = string("model_model_kv_cache_0_internal_tensor_assign_52_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_52_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_51_write_state")]; tensor coreml_update_state_107 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_51")]; tensor var_13475_begin_0 = const()[name = string("op_13475_begin_0"), val = tensor([25, 0, 0, 0])]; tensor var_13475_end_0 = const()[name = string("op_13475_end_0"), val = tensor([26, 8, 1024, 128])]; tensor var_13475_end_mask_0 = const()[name = string("op_13475_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_13475_cast_fp16 = slice_by_index(begin = var_13475_begin_0, end = var_13475_end_0, end_mask = var_13475_end_mask_0, x = coreml_update_state_107)[name = string("op_13475_cast_fp16")]; tensor K_layer_cache_51_axes_0 = const()[name = string("K_layer_cache_51_axes_0"), val = tensor([0])]; tensor K_layer_cache_51_cast_fp16 = squeeze(axes = K_layer_cache_51_axes_0, x = var_13475_cast_fp16)[name = string("K_layer_cache_51_cast_fp16")]; tensor var_13482_begin_0 = const()[name = string("op_13482_begin_0"), val = tensor([53, 0, 0, 0])]; tensor var_13482_end_0 = const()[name = string("op_13482_end_0"), val = tensor([54, 8, 1024, 128])]; tensor var_13482_end_mask_0 = const()[name = string("op_13482_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_13482_cast_fp16 = slice_by_index(begin = var_13482_begin_0, end = var_13482_end_0, end_mask = var_13482_end_mask_0, x = coreml_update_state_107)[name = string("op_13482_cast_fp16")]; tensor V_layer_cache_51_axes_0 = const()[name = string("V_layer_cache_51_axes_0"), val = tensor([0])]; tensor V_layer_cache_51_cast_fp16 = squeeze(axes = V_layer_cache_51_axes_0, x = var_13482_cast_fp16)[name = string("V_layer_cache_51_cast_fp16")]; tensor x_507_axes_0 = const()[name = string("x_507_axes_0"), val = tensor([1])]; tensor x_507_cast_fp16 = expand_dims(axes = x_507_axes_0, x = K_layer_cache_51_cast_fp16)[name = string("x_507_cast_fp16")]; tensor var_13519 = const()[name = string("op_13519"), val = tensor([1, 2, 1, 1])]; tensor x_509_cast_fp16 = tile(reps = var_13519, x = x_507_cast_fp16)[name = string("x_509_cast_fp16")]; tensor var_13531 = const()[name = string("op_13531"), val = tensor([1, -1, 1024, 128])]; tensor key_states_103_cast_fp16 = reshape(shape = var_13531, x = x_509_cast_fp16)[name = string("key_states_103_cast_fp16")]; tensor x_513_axes_0 = const()[name = string("x_513_axes_0"), val = tensor([1])]; tensor x_513_cast_fp16 = expand_dims(axes = x_513_axes_0, x = V_layer_cache_51_cast_fp16)[name = string("x_513_cast_fp16")]; tensor var_13539 = const()[name = string("op_13539"), val = tensor([1, 2, 1, 1])]; tensor x_515_cast_fp16 = tile(reps = var_13539, x = x_513_cast_fp16)[name = string("x_515_cast_fp16")]; tensor var_13551 = const()[name = string("op_13551"), val = tensor([1, -1, 1024, 128])]; tensor value_states_153_cast_fp16 = reshape(shape = var_13551, x = x_515_cast_fp16)[name = string("value_states_153_cast_fp16")]; bool var_13566_transpose_x_1 = const()[name = string("op_13566_transpose_x_1"), val = bool(false)]; bool var_13566_transpose_y_1 = const()[name = string("op_13566_transpose_y_1"), val = bool(true)]; tensor var_13566 = matmul(transpose_x = var_13566_transpose_x_1, transpose_y = var_13566_transpose_y_1, x = query_states_101, y = key_states_103_cast_fp16)[name = string("op_13566")]; fp16 var_13567_to_fp16 = const()[name = string("op_13567_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_151_cast_fp16 = mul(x = var_13566, y = var_13567_to_fp16)[name = string("attn_weights_151_cast_fp16")]; tensor attn_weights_153_cast_fp16 = add(x = attn_weights_151_cast_fp16, y = causal_mask)[name = string("attn_weights_153_cast_fp16")]; int32 var_13602 = const()[name = string("op_13602"), val = int32(-1)]; tensor attn_weights_155_cast_fp16 = softmax(axis = var_13602, x = attn_weights_153_cast_fp16)[name = string("attn_weights_155_cast_fp16")]; bool attn_output_251_transpose_x_0 = const()[name = string("attn_output_251_transpose_x_0"), val = bool(false)]; bool attn_output_251_transpose_y_0 = const()[name = string("attn_output_251_transpose_y_0"), val = bool(false)]; tensor attn_output_251_cast_fp16 = matmul(transpose_x = attn_output_251_transpose_x_0, transpose_y = attn_output_251_transpose_y_0, x = attn_weights_155_cast_fp16, y = value_states_153_cast_fp16)[name = string("attn_output_251_cast_fp16")]; tensor var_13613_perm_0 = const()[name = string("op_13613_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_13617 = const()[name = string("op_13617"), val = tensor([1, 1, 2048])]; tensor var_13613_cast_fp16 = transpose(perm = var_13613_perm_0, x = attn_output_251_cast_fp16)[name = string("transpose_16")]; tensor attn_output_255_cast_fp16 = reshape(shape = var_13617, x = var_13613_cast_fp16)[name = string("attn_output_255_cast_fp16")]; tensor var_13622 = const()[name = string("op_13622"), val = tensor([0, 2, 1])]; string var_13638_pad_type_0 = const()[name = string("op_13638_pad_type_0"), val = string("valid")]; int32 var_13638_groups_0 = const()[name = string("op_13638_groups_0"), val = int32(1)]; tensor var_13638_strides_0 = const()[name = string("op_13638_strides_0"), val = tensor([1])]; tensor var_13638_pad_0 = const()[name = string("op_13638_pad_0"), val = tensor([0, 0])]; tensor var_13638_dilations_0 = const()[name = string("op_13638_dilations_0"), val = tensor([1])]; tensor squeeze_25_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1090412288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1093558080))))[name = string("squeeze_25_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_13623_cast_fp16 = transpose(perm = var_13622, x = attn_output_255_cast_fp16)[name = string("transpose_15")]; tensor var_13638_cast_fp16 = conv(dilations = var_13638_dilations_0, groups = var_13638_groups_0, pad = var_13638_pad_0, pad_type = var_13638_pad_type_0, strides = var_13638_strides_0, weight = squeeze_25_cast_fp16_to_fp32_to_fp16_palettized, x = var_13623_cast_fp16)[name = string("op_13638_cast_fp16")]; tensor var_13642 = const()[name = string("op_13642"), val = tensor([0, 2, 1])]; tensor attn_output_259_cast_fp16 = transpose(perm = var_13642, x = var_13638_cast_fp16)[name = string("transpose_14")]; tensor hidden_states_155_cast_fp16 = add(x = hidden_states_151_cast_fp16, y = attn_output_259_cast_fp16)[name = string("hidden_states_155_cast_fp16")]; tensor mean_207_axes_0 = const()[name = string("mean_207_axes_0"), val = tensor([-1])]; bool mean_207_keep_dims_0 = const()[name = string("mean_207_keep_dims_0"), val = bool(true)]; tensor mean_207_cast_fp16 = reduce_mean(axes = mean_207_axes_0, keep_dims = mean_207_keep_dims_0, x = hidden_states_155_cast_fp16)[name = string("mean_207_cast_fp16")]; tensor input_461_cast_fp16 = sub(x = hidden_states_155_cast_fp16, y = mean_207_cast_fp16)[name = string("input_461_cast_fp16")]; tensor var_13661_axes_0 = const()[name = string("op_13661_axes_0"), val = tensor([-1])]; tensor model_model_layers_25_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_25_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1093590912)))]; fp16 var_13649_to_fp16 = const()[name = string("op_13649_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_13661_cast_fp16 = layer_norm(axes = var_13661_axes_0, epsilon = var_13649_to_fp16, gamma = model_model_layers_25_post_attention_layernorm_weight_to_fp16, x = input_461_cast_fp16)[name = string("op_13661_cast_fp16")]; tensor var_13675 = const()[name = string("op_13675"), val = tensor([0, 2, 1])]; tensor input_463_axes_0 = const()[name = string("input_463_axes_0"), val = tensor([2])]; tensor var_13676 = transpose(perm = var_13675, x = var_13661_cast_fp16)[name = string("transpose_13")]; tensor input_463 = expand_dims(axes = input_463_axes_0, x = var_13676)[name = string("input_463")]; string input_465_pad_type_0 = const()[name = string("input_465_pad_type_0"), val = string("valid")]; tensor input_465_strides_0 = const()[name = string("input_465_strides_0"), val = tensor([1, 1])]; tensor input_465_pad_0 = const()[name = string("input_465_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_465_dilations_0 = const()[name = string("input_465_dilations_0"), val = tensor([1, 1])]; int32 input_465_groups_0 = const()[name = string("input_465_groups_0"), val = int32(1)]; tensor input_465 = conv(dilations = input_465_dilations_0, groups = input_465_groups_0, pad = input_465_pad_0, pad_type = input_465_pad_type_0, strides = input_465_strides_0, weight = model_model_layers_25_mlp_gate_proj_weight_palettized, x = input_463)[name = string("input_465")]; string b_51_pad_type_0 = const()[name = string("b_51_pad_type_0"), val = string("valid")]; tensor b_51_strides_0 = const()[name = string("b_51_strides_0"), val = tensor([1, 1])]; tensor b_51_pad_0 = const()[name = string("b_51_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_51_dilations_0 = const()[name = string("b_51_dilations_0"), val = tensor([1, 1])]; int32 b_51_groups_0 = const()[name = string("b_51_groups_0"), val = int32(1)]; tensor b_51 = conv(dilations = b_51_dilations_0, groups = b_51_groups_0, pad = b_51_pad_0, pad_type = b_51_pad_type_0, strides = b_51_strides_0, weight = model_model_layers_25_mlp_up_proj_weight_palettized, x = input_463)[name = string("b_51")]; tensor c_51 = silu(x = input_465)[name = string("c_51")]; tensor input_467 = mul(x = c_51, y = b_51)[name = string("input_467")]; string e_51_pad_type_0 = const()[name = string("e_51_pad_type_0"), val = string("valid")]; tensor e_51_strides_0 = const()[name = string("e_51_strides_0"), val = tensor([1, 1])]; tensor e_51_pad_0 = const()[name = string("e_51_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_51_dilations_0 = const()[name = string("e_51_dilations_0"), val = tensor([1, 1])]; int32 e_51_groups_0 = const()[name = string("e_51_groups_0"), val = int32(1)]; tensor e_51 = conv(dilations = e_51_dilations_0, groups = e_51_groups_0, pad = e_51_pad_0, pad_type = e_51_pad_type_0, strides = e_51_strides_0, weight = model_model_layers_25_mlp_down_proj_weight_palettized, x = input_467)[name = string("e_51")]; tensor var_13698_axes_0 = const()[name = string("op_13698_axes_0"), val = tensor([2])]; tensor var_13698 = squeeze(axes = var_13698_axes_0, x = e_51)[name = string("op_13698")]; tensor var_13699 = const()[name = string("op_13699"), val = tensor([0, 2, 1])]; tensor var_13700 = transpose(perm = var_13699, x = var_13698)[name = string("transpose_12")]; tensor hidden_states_157_cast_fp16 = add(x = hidden_states_155_cast_fp16, y = var_13700)[name = string("hidden_states_157_cast_fp16")]; tensor mean_209_axes_0 = const()[name = string("mean_209_axes_0"), val = tensor([-1])]; bool mean_209_keep_dims_0 = const()[name = string("mean_209_keep_dims_0"), val = bool(true)]; tensor mean_209_cast_fp16 = reduce_mean(axes = mean_209_axes_0, keep_dims = mean_209_keep_dims_0, x = hidden_states_157_cast_fp16)[name = string("mean_209_cast_fp16")]; tensor input_469_cast_fp16 = sub(x = hidden_states_157_cast_fp16, y = mean_209_cast_fp16)[name = string("input_469_cast_fp16")]; tensor var_13718_axes_0 = const()[name = string("op_13718_axes_0"), val = tensor([-1])]; tensor model_model_layers_26_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_26_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1093595072)))]; fp16 var_13706_to_fp16 = const()[name = string("op_13706_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_13718_cast_fp16 = layer_norm(axes = var_13718_axes_0, epsilon = var_13706_to_fp16, gamma = model_model_layers_26_input_layernorm_weight_to_fp16, x = input_469_cast_fp16)[name = string("op_13718_cast_fp16")]; tensor var_13724 = const()[name = string("op_13724"), val = tensor([0, 2, 1])]; tensor var_13727_axes_0 = const()[name = string("op_13727_axes_0"), val = tensor([2])]; tensor var_13725 = transpose(perm = var_13724, x = var_13718_cast_fp16)[name = string("transpose_11")]; tensor var_13727 = expand_dims(axes = var_13727_axes_0, x = var_13725)[name = string("op_13727")]; string var_13743_pad_type_0 = const()[name = string("op_13743_pad_type_0"), val = string("valid")]; tensor var_13743_strides_0 = const()[name = string("op_13743_strides_0"), val = tensor([1, 1])]; tensor var_13743_pad_0 = const()[name = string("op_13743_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_13743_dilations_0 = const()[name = string("op_13743_dilations_0"), val = tensor([1, 1])]; int32 var_13743_groups_0 = const()[name = string("op_13743_groups_0"), val = int32(1)]; tensor var_13743 = conv(dilations = var_13743_dilations_0, groups = var_13743_groups_0, pad = var_13743_pad_0, pad_type = var_13743_pad_type_0, strides = var_13743_strides_0, weight = model_model_layers_26_self_attn_q_proj_weight_palettized, x = var_13727)[name = string("op_13743")]; tensor var_13748 = const()[name = string("op_13748"), val = tensor([1, 16, 1, 128])]; tensor var_13749 = reshape(shape = var_13748, x = var_13743)[name = string("op_13749")]; string var_13765_pad_type_0 = const()[name = string("op_13765_pad_type_0"), val = string("valid")]; tensor var_13765_strides_0 = const()[name = string("op_13765_strides_0"), val = tensor([1, 1])]; tensor var_13765_pad_0 = const()[name = string("op_13765_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_13765_dilations_0 = const()[name = string("op_13765_dilations_0"), val = tensor([1, 1])]; int32 var_13765_groups_0 = const()[name = string("op_13765_groups_0"), val = int32(1)]; tensor var_13765 = conv(dilations = var_13765_dilations_0, groups = var_13765_groups_0, pad = var_13765_pad_0, pad_type = var_13765_pad_type_0, strides = var_13765_strides_0, weight = model_model_layers_26_self_attn_k_proj_weight_palettized, x = var_13727)[name = string("op_13765")]; tensor var_13770 = const()[name = string("op_13770"), val = tensor([1, 8, 1, 128])]; tensor var_13771 = reshape(shape = var_13770, x = var_13765)[name = string("op_13771")]; string var_13787_pad_type_0 = const()[name = string("op_13787_pad_type_0"), val = string("valid")]; tensor var_13787_strides_0 = const()[name = string("op_13787_strides_0"), val = tensor([1, 1])]; tensor var_13787_pad_0 = const()[name = string("op_13787_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_13787_dilations_0 = const()[name = string("op_13787_dilations_0"), val = tensor([1, 1])]; int32 var_13787_groups_0 = const()[name = string("op_13787_groups_0"), val = int32(1)]; tensor var_13787 = conv(dilations = var_13787_dilations_0, groups = var_13787_groups_0, pad = var_13787_pad_0, pad_type = var_13787_pad_type_0, strides = var_13787_strides_0, weight = model_model_layers_26_self_attn_v_proj_weight_palettized, x = var_13727)[name = string("op_13787")]; tensor var_13792 = const()[name = string("op_13792"), val = tensor([1, 8, 1, 128])]; tensor var_13793 = reshape(shape = var_13792, x = var_13787)[name = string("op_13793")]; tensor mean_211_axes_0 = const()[name = string("mean_211_axes_0"), val = tensor([-1])]; bool mean_211_keep_dims_0 = const()[name = string("mean_211_keep_dims_0"), val = bool(true)]; tensor mean_211 = reduce_mean(axes = mean_211_axes_0, keep_dims = mean_211_keep_dims_0, x = var_13749)[name = string("mean_211")]; tensor input_473 = sub(x = var_13749, y = mean_211)[name = string("input_473")]; tensor var_13814_axes_0 = const()[name = string("op_13814_axes_0"), val = tensor([-1])]; tensor model_model_layers_26_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_26_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1093599232)))]; fp16 var_13802_to_fp16 = const()[name = string("op_13802_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_13814_cast_fp16 = layer_norm(axes = var_13814_axes_0, epsilon = var_13802_to_fp16, gamma = model_model_layers_26_self_attn_q_norm_weight_to_fp16, x = input_473)[name = string("op_13814_cast_fp16")]; tensor mean_213_axes_0 = const()[name = string("mean_213_axes_0"), val = tensor([-1])]; bool mean_213_keep_dims_0 = const()[name = string("mean_213_keep_dims_0"), val = bool(true)]; tensor mean_213 = reduce_mean(axes = mean_213_axes_0, keep_dims = mean_213_keep_dims_0, x = var_13771)[name = string("mean_213")]; tensor input_475 = sub(x = var_13771, y = mean_213)[name = string("input_475")]; tensor var_13832_axes_0 = const()[name = string("op_13832_axes_0"), val = tensor([-1])]; tensor model_model_layers_26_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_26_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1093599552)))]; fp16 var_13820_to_fp16 = const()[name = string("op_13820_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_13832_cast_fp16 = layer_norm(axes = var_13832_axes_0, epsilon = var_13820_to_fp16, gamma = model_model_layers_26_self_attn_k_norm_weight_to_fp16, x = input_475)[name = string("op_13832_cast_fp16")]; tensor var_13835 = mul(x = var_13814_cast_fp16, y = cos_1_cast_fp16)[name = string("op_13835")]; tensor x1_105_begin_0 = const()[name = string("x1_105_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_105_end_0 = const()[name = string("x1_105_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_105_end_mask_0 = const()[name = string("x1_105_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_105 = slice_by_index(begin = x1_105_begin_0, end = x1_105_end_0, end_mask = x1_105_end_mask_0, x = var_13814_cast_fp16)[name = string("x1_105")]; tensor x2_105_begin_0 = const()[name = string("x2_105_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_105_end_0 = const()[name = string("x2_105_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_105_end_mask_0 = const()[name = string("x2_105_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_105 = slice_by_index(begin = x2_105_begin_0, end = x2_105_end_0, end_mask = x2_105_end_mask_0, x = var_13814_cast_fp16)[name = string("x2_105")]; fp16 const_473_promoted = const()[name = string("const_473_promoted"), val = fp16(-0x1p+0)]; tensor var_13856 = mul(x = x2_105, y = const_473_promoted)[name = string("op_13856")]; int32 var_13858 = const()[name = string("op_13858"), val = int32(-1)]; bool var_13859_interleave_0 = const()[name = string("op_13859_interleave_0"), val = bool(false)]; tensor var_13859 = concat(axis = var_13858, interleave = var_13859_interleave_0, values = (var_13856, x1_105))[name = string("op_13859")]; tensor var_13860 = mul(x = var_13859, y = sin_1_cast_fp16)[name = string("op_13860")]; tensor query_states_105 = add(x = var_13835, y = var_13860)[name = string("query_states_105")]; tensor var_13863 = mul(x = var_13832_cast_fp16, y = cos_1_cast_fp16)[name = string("op_13863")]; tensor x1_107_begin_0 = const()[name = string("x1_107_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_107_end_0 = const()[name = string("x1_107_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_107_end_mask_0 = const()[name = string("x1_107_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_107 = slice_by_index(begin = x1_107_begin_0, end = x1_107_end_0, end_mask = x1_107_end_mask_0, x = var_13832_cast_fp16)[name = string("x1_107")]; tensor x2_107_begin_0 = const()[name = string("x2_107_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_107_end_0 = const()[name = string("x2_107_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_107_end_mask_0 = const()[name = string("x2_107_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_107 = slice_by_index(begin = x2_107_begin_0, end = x2_107_end_0, end_mask = x2_107_end_mask_0, x = var_13832_cast_fp16)[name = string("x2_107")]; fp16 const_476_promoted = const()[name = string("const_476_promoted"), val = fp16(-0x1p+0)]; tensor var_13884 = mul(x = x2_107, y = const_476_promoted)[name = string("op_13884")]; int32 var_13886 = const()[name = string("op_13886"), val = int32(-1)]; bool var_13887_interleave_0 = const()[name = string("op_13887_interleave_0"), val = bool(false)]; tensor var_13887 = concat(axis = var_13886, interleave = var_13887_interleave_0, values = (var_13884, x1_107))[name = string("op_13887")]; tensor var_13888 = mul(x = var_13887, y = sin_1_cast_fp16)[name = string("op_13888")]; tensor key_states_105 = add(x = var_13863, y = var_13888)[name = string("key_states_105")]; tensor expand_dims_312 = const()[name = string("expand_dims_312"), val = tensor([26])]; tensor expand_dims_313 = const()[name = string("expand_dims_313"), val = tensor([0])]; tensor expand_dims_315 = const()[name = string("expand_dims_315"), val = tensor([0])]; tensor expand_dims_316 = const()[name = string("expand_dims_316"), val = tensor([27])]; int32 concat_210_axis_0 = const()[name = string("concat_210_axis_0"), val = int32(0)]; bool concat_210_interleave_0 = const()[name = string("concat_210_interleave_0"), val = bool(false)]; tensor concat_210 = concat(axis = concat_210_axis_0, interleave = concat_210_interleave_0, values = (expand_dims_312, expand_dims_313, current_pos, expand_dims_315))[name = string("concat_210")]; tensor concat_211_values1_0 = const()[name = string("concat_211_values1_0"), val = tensor([0])]; tensor concat_211_values3_0 = const()[name = string("concat_211_values3_0"), val = tensor([0])]; int32 concat_211_axis_0 = const()[name = string("concat_211_axis_0"), val = int32(0)]; bool concat_211_interleave_0 = const()[name = string("concat_211_interleave_0"), val = bool(false)]; tensor concat_211 = concat(axis = concat_211_axis_0, interleave = concat_211_interleave_0, values = (expand_dims_316, concat_211_values1_0, var_1725, concat_211_values3_0))[name = string("concat_211")]; tensor model_model_kv_cache_0_internal_tensor_assign_53_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_53_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_53_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_53_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_53_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_53_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_53_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_53_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_53_cast_fp16 = slice_update(begin = concat_210, begin_mask = model_model_kv_cache_0_internal_tensor_assign_53_begin_mask_0, end = concat_211, end_mask = model_model_kv_cache_0_internal_tensor_assign_53_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_53_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_53_stride_0, update = key_states_105, x = coreml_update_state_107)[name = string("model_model_kv_cache_0_internal_tensor_assign_53_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_53_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_52_write_state")]; tensor coreml_update_state_108 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_52")]; tensor expand_dims_318 = const()[name = string("expand_dims_318"), val = tensor([54])]; tensor expand_dims_319 = const()[name = string("expand_dims_319"), val = tensor([0])]; tensor expand_dims_321 = const()[name = string("expand_dims_321"), val = tensor([0])]; tensor expand_dims_322 = const()[name = string("expand_dims_322"), val = tensor([55])]; int32 concat_214_axis_0 = const()[name = string("concat_214_axis_0"), val = int32(0)]; bool concat_214_interleave_0 = const()[name = string("concat_214_interleave_0"), val = bool(false)]; tensor concat_214 = concat(axis = concat_214_axis_0, interleave = concat_214_interleave_0, values = (expand_dims_318, expand_dims_319, current_pos, expand_dims_321))[name = string("concat_214")]; tensor concat_215_values1_0 = const()[name = string("concat_215_values1_0"), val = tensor([0])]; tensor concat_215_values3_0 = const()[name = string("concat_215_values3_0"), val = tensor([0])]; int32 concat_215_axis_0 = const()[name = string("concat_215_axis_0"), val = int32(0)]; bool concat_215_interleave_0 = const()[name = string("concat_215_interleave_0"), val = bool(false)]; tensor concat_215 = concat(axis = concat_215_axis_0, interleave = concat_215_interleave_0, values = (expand_dims_322, concat_215_values1_0, var_1725, concat_215_values3_0))[name = string("concat_215")]; tensor model_model_kv_cache_0_internal_tensor_assign_54_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_54_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_54_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_54_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_54_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_54_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_54_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_54_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_54_cast_fp16 = slice_update(begin = concat_214, begin_mask = model_model_kv_cache_0_internal_tensor_assign_54_begin_mask_0, end = concat_215, end_mask = model_model_kv_cache_0_internal_tensor_assign_54_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_54_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_54_stride_0, update = var_13793, x = coreml_update_state_108)[name = string("model_model_kv_cache_0_internal_tensor_assign_54_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_54_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_53_write_state")]; tensor coreml_update_state_109 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_53")]; tensor var_13943_begin_0 = const()[name = string("op_13943_begin_0"), val = tensor([26, 0, 0, 0])]; tensor var_13943_end_0 = const()[name = string("op_13943_end_0"), val = tensor([27, 8, 1024, 128])]; tensor var_13943_end_mask_0 = const()[name = string("op_13943_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_13943_cast_fp16 = slice_by_index(begin = var_13943_begin_0, end = var_13943_end_0, end_mask = var_13943_end_mask_0, x = coreml_update_state_109)[name = string("op_13943_cast_fp16")]; tensor K_layer_cache_53_axes_0 = const()[name = string("K_layer_cache_53_axes_0"), val = tensor([0])]; tensor K_layer_cache_53_cast_fp16 = squeeze(axes = K_layer_cache_53_axes_0, x = var_13943_cast_fp16)[name = string("K_layer_cache_53_cast_fp16")]; tensor var_13950_begin_0 = const()[name = string("op_13950_begin_0"), val = tensor([54, 0, 0, 0])]; tensor var_13950_end_0 = const()[name = string("op_13950_end_0"), val = tensor([55, 8, 1024, 128])]; tensor var_13950_end_mask_0 = const()[name = string("op_13950_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_13950_cast_fp16 = slice_by_index(begin = var_13950_begin_0, end = var_13950_end_0, end_mask = var_13950_end_mask_0, x = coreml_update_state_109)[name = string("op_13950_cast_fp16")]; tensor V_layer_cache_53_axes_0 = const()[name = string("V_layer_cache_53_axes_0"), val = tensor([0])]; tensor V_layer_cache_53_cast_fp16 = squeeze(axes = V_layer_cache_53_axes_0, x = var_13950_cast_fp16)[name = string("V_layer_cache_53_cast_fp16")]; tensor x_527_axes_0 = const()[name = string("x_527_axes_0"), val = tensor([1])]; tensor x_527_cast_fp16 = expand_dims(axes = x_527_axes_0, x = K_layer_cache_53_cast_fp16)[name = string("x_527_cast_fp16")]; tensor var_13987 = const()[name = string("op_13987"), val = tensor([1, 2, 1, 1])]; tensor x_529_cast_fp16 = tile(reps = var_13987, x = x_527_cast_fp16)[name = string("x_529_cast_fp16")]; tensor var_13999 = const()[name = string("op_13999"), val = tensor([1, -1, 1024, 128])]; tensor key_states_107_cast_fp16 = reshape(shape = var_13999, x = x_529_cast_fp16)[name = string("key_states_107_cast_fp16")]; tensor x_533_axes_0 = const()[name = string("x_533_axes_0"), val = tensor([1])]; tensor x_533_cast_fp16 = expand_dims(axes = x_533_axes_0, x = V_layer_cache_53_cast_fp16)[name = string("x_533_cast_fp16")]; tensor var_14007 = const()[name = string("op_14007"), val = tensor([1, 2, 1, 1])]; tensor x_535_cast_fp16 = tile(reps = var_14007, x = x_533_cast_fp16)[name = string("x_535_cast_fp16")]; tensor var_14019 = const()[name = string("op_14019"), val = tensor([1, -1, 1024, 128])]; tensor value_states_159_cast_fp16 = reshape(shape = var_14019, x = x_535_cast_fp16)[name = string("value_states_159_cast_fp16")]; bool var_14034_transpose_x_1 = const()[name = string("op_14034_transpose_x_1"), val = bool(false)]; bool var_14034_transpose_y_1 = const()[name = string("op_14034_transpose_y_1"), val = bool(true)]; tensor var_14034 = matmul(transpose_x = var_14034_transpose_x_1, transpose_y = var_14034_transpose_y_1, x = query_states_105, y = key_states_107_cast_fp16)[name = string("op_14034")]; fp16 var_14035_to_fp16 = const()[name = string("op_14035_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_157_cast_fp16 = mul(x = var_14034, y = var_14035_to_fp16)[name = string("attn_weights_157_cast_fp16")]; tensor attn_weights_159_cast_fp16 = add(x = attn_weights_157_cast_fp16, y = causal_mask)[name = string("attn_weights_159_cast_fp16")]; int32 var_14070 = const()[name = string("op_14070"), val = int32(-1)]; tensor attn_weights_161_cast_fp16 = softmax(axis = var_14070, x = attn_weights_159_cast_fp16)[name = string("attn_weights_161_cast_fp16")]; bool attn_output_261_transpose_x_0 = const()[name = string("attn_output_261_transpose_x_0"), val = bool(false)]; bool attn_output_261_transpose_y_0 = const()[name = string("attn_output_261_transpose_y_0"), val = bool(false)]; tensor attn_output_261_cast_fp16 = matmul(transpose_x = attn_output_261_transpose_x_0, transpose_y = attn_output_261_transpose_y_0, x = attn_weights_161_cast_fp16, y = value_states_159_cast_fp16)[name = string("attn_output_261_cast_fp16")]; tensor var_14081_perm_0 = const()[name = string("op_14081_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_14085 = const()[name = string("op_14085"), val = tensor([1, 1, 2048])]; tensor var_14081_cast_fp16 = transpose(perm = var_14081_perm_0, x = attn_output_261_cast_fp16)[name = string("transpose_10")]; tensor attn_output_265_cast_fp16 = reshape(shape = var_14085, x = var_14081_cast_fp16)[name = string("attn_output_265_cast_fp16")]; tensor var_14090 = const()[name = string("op_14090"), val = tensor([0, 2, 1])]; string var_14106_pad_type_0 = const()[name = string("op_14106_pad_type_0"), val = string("valid")]; int32 var_14106_groups_0 = const()[name = string("op_14106_groups_0"), val = int32(1)]; tensor var_14106_strides_0 = const()[name = string("op_14106_strides_0"), val = tensor([1])]; tensor var_14106_pad_0 = const()[name = string("op_14106_pad_0"), val = tensor([0, 0])]; tensor var_14106_dilations_0 = const()[name = string("op_14106_dilations_0"), val = tensor([1])]; tensor squeeze_26_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1093599872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1096745664))))[name = string("squeeze_26_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_14091_cast_fp16 = transpose(perm = var_14090, x = attn_output_265_cast_fp16)[name = string("transpose_9")]; tensor var_14106_cast_fp16 = conv(dilations = var_14106_dilations_0, groups = var_14106_groups_0, pad = var_14106_pad_0, pad_type = var_14106_pad_type_0, strides = var_14106_strides_0, weight = squeeze_26_cast_fp16_to_fp32_to_fp16_palettized, x = var_14091_cast_fp16)[name = string("op_14106_cast_fp16")]; tensor var_14110 = const()[name = string("op_14110"), val = tensor([0, 2, 1])]; tensor attn_output_269_cast_fp16 = transpose(perm = var_14110, x = var_14106_cast_fp16)[name = string("transpose_8")]; tensor hidden_states_161_cast_fp16 = add(x = hidden_states_157_cast_fp16, y = attn_output_269_cast_fp16)[name = string("hidden_states_161_cast_fp16")]; tensor mean_215_axes_0 = const()[name = string("mean_215_axes_0"), val = tensor([-1])]; bool mean_215_keep_dims_0 = const()[name = string("mean_215_keep_dims_0"), val = bool(true)]; tensor mean_215_cast_fp16 = reduce_mean(axes = mean_215_axes_0, keep_dims = mean_215_keep_dims_0, x = hidden_states_161_cast_fp16)[name = string("mean_215_cast_fp16")]; tensor input_479_cast_fp16 = sub(x = hidden_states_161_cast_fp16, y = mean_215_cast_fp16)[name = string("input_479_cast_fp16")]; tensor var_14129_axes_0 = const()[name = string("op_14129_axes_0"), val = tensor([-1])]; tensor model_model_layers_26_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_26_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1096778496)))]; fp16 var_14117_to_fp16 = const()[name = string("op_14117_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_14129_cast_fp16 = layer_norm(axes = var_14129_axes_0, epsilon = var_14117_to_fp16, gamma = model_model_layers_26_post_attention_layernorm_weight_to_fp16, x = input_479_cast_fp16)[name = string("op_14129_cast_fp16")]; tensor var_14143 = const()[name = string("op_14143"), val = tensor([0, 2, 1])]; tensor input_481_axes_0 = const()[name = string("input_481_axes_0"), val = tensor([2])]; tensor var_14144 = transpose(perm = var_14143, x = var_14129_cast_fp16)[name = string("transpose_7")]; tensor input_481 = expand_dims(axes = input_481_axes_0, x = var_14144)[name = string("input_481")]; string input_483_pad_type_0 = const()[name = string("input_483_pad_type_0"), val = string("valid")]; tensor input_483_strides_0 = const()[name = string("input_483_strides_0"), val = tensor([1, 1])]; tensor input_483_pad_0 = const()[name = string("input_483_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_483_dilations_0 = const()[name = string("input_483_dilations_0"), val = tensor([1, 1])]; int32 input_483_groups_0 = const()[name = string("input_483_groups_0"), val = int32(1)]; tensor input_483 = conv(dilations = input_483_dilations_0, groups = input_483_groups_0, pad = input_483_pad_0, pad_type = input_483_pad_type_0, strides = input_483_strides_0, weight = model_model_layers_26_mlp_gate_proj_weight_palettized, x = input_481)[name = string("input_483")]; string b_53_pad_type_0 = const()[name = string("b_53_pad_type_0"), val = string("valid")]; tensor b_53_strides_0 = const()[name = string("b_53_strides_0"), val = tensor([1, 1])]; tensor b_53_pad_0 = const()[name = string("b_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_53_dilations_0 = const()[name = string("b_53_dilations_0"), val = tensor([1, 1])]; int32 b_53_groups_0 = const()[name = string("b_53_groups_0"), val = int32(1)]; tensor b_53 = conv(dilations = b_53_dilations_0, groups = b_53_groups_0, pad = b_53_pad_0, pad_type = b_53_pad_type_0, strides = b_53_strides_0, weight = model_model_layers_26_mlp_up_proj_weight_palettized, x = input_481)[name = string("b_53")]; tensor c_53 = silu(x = input_483)[name = string("c_53")]; tensor input_485 = mul(x = c_53, y = b_53)[name = string("input_485")]; string e_53_pad_type_0 = const()[name = string("e_53_pad_type_0"), val = string("valid")]; tensor e_53_strides_0 = const()[name = string("e_53_strides_0"), val = tensor([1, 1])]; tensor e_53_pad_0 = const()[name = string("e_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_53_dilations_0 = const()[name = string("e_53_dilations_0"), val = tensor([1, 1])]; int32 e_53_groups_0 = const()[name = string("e_53_groups_0"), val = int32(1)]; tensor e_53 = conv(dilations = e_53_dilations_0, groups = e_53_groups_0, pad = e_53_pad_0, pad_type = e_53_pad_type_0, strides = e_53_strides_0, weight = model_model_layers_26_mlp_down_proj_weight_palettized, x = input_485)[name = string("e_53")]; tensor var_14166_axes_0 = const()[name = string("op_14166_axes_0"), val = tensor([2])]; tensor var_14166 = squeeze(axes = var_14166_axes_0, x = e_53)[name = string("op_14166")]; tensor var_14167 = const()[name = string("op_14167"), val = tensor([0, 2, 1])]; tensor var_14168 = transpose(perm = var_14167, x = var_14166)[name = string("transpose_6")]; tensor hidden_states_163_cast_fp16 = add(x = hidden_states_161_cast_fp16, y = var_14168)[name = string("hidden_states_163_cast_fp16")]; tensor mean_217_axes_0 = const()[name = string("mean_217_axes_0"), val = tensor([-1])]; bool mean_217_keep_dims_0 = const()[name = string("mean_217_keep_dims_0"), val = bool(true)]; tensor mean_217_cast_fp16 = reduce_mean(axes = mean_217_axes_0, keep_dims = mean_217_keep_dims_0, x = hidden_states_163_cast_fp16)[name = string("mean_217_cast_fp16")]; tensor input_487_cast_fp16 = sub(x = hidden_states_163_cast_fp16, y = mean_217_cast_fp16)[name = string("input_487_cast_fp16")]; tensor var_14186_axes_0 = const()[name = string("op_14186_axes_0"), val = tensor([-1])]; tensor model_model_layers_27_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_27_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1096782656)))]; fp16 var_14174_to_fp16 = const()[name = string("op_14174_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_14186_cast_fp16 = layer_norm(axes = var_14186_axes_0, epsilon = var_14174_to_fp16, gamma = model_model_layers_27_input_layernorm_weight_to_fp16, x = input_487_cast_fp16)[name = string("op_14186_cast_fp16")]; tensor var_14192 = const()[name = string("op_14192"), val = tensor([0, 2, 1])]; tensor var_14195_axes_0 = const()[name = string("op_14195_axes_0"), val = tensor([2])]; tensor var_14193 = transpose(perm = var_14192, x = var_14186_cast_fp16)[name = string("transpose_5")]; tensor var_14195 = expand_dims(axes = var_14195_axes_0, x = var_14193)[name = string("op_14195")]; string var_14211_pad_type_0 = const()[name = string("op_14211_pad_type_0"), val = string("valid")]; tensor var_14211_strides_0 = const()[name = string("op_14211_strides_0"), val = tensor([1, 1])]; tensor var_14211_pad_0 = const()[name = string("op_14211_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_14211_dilations_0 = const()[name = string("op_14211_dilations_0"), val = tensor([1, 1])]; int32 var_14211_groups_0 = const()[name = string("op_14211_groups_0"), val = int32(1)]; tensor var_14211 = conv(dilations = var_14211_dilations_0, groups = var_14211_groups_0, pad = var_14211_pad_0, pad_type = var_14211_pad_type_0, strides = var_14211_strides_0, weight = model_model_layers_27_self_attn_q_proj_weight_palettized, x = var_14195)[name = string("op_14211")]; tensor var_14216 = const()[name = string("op_14216"), val = tensor([1, 16, 1, 128])]; tensor var_14217 = reshape(shape = var_14216, x = var_14211)[name = string("op_14217")]; string var_14233_pad_type_0 = const()[name = string("op_14233_pad_type_0"), val = string("valid")]; tensor var_14233_strides_0 = const()[name = string("op_14233_strides_0"), val = tensor([1, 1])]; tensor var_14233_pad_0 = const()[name = string("op_14233_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_14233_dilations_0 = const()[name = string("op_14233_dilations_0"), val = tensor([1, 1])]; int32 var_14233_groups_0 = const()[name = string("op_14233_groups_0"), val = int32(1)]; tensor var_14233 = conv(dilations = var_14233_dilations_0, groups = var_14233_groups_0, pad = var_14233_pad_0, pad_type = var_14233_pad_type_0, strides = var_14233_strides_0, weight = model_model_layers_27_self_attn_k_proj_weight_palettized, x = var_14195)[name = string("op_14233")]; tensor var_14238 = const()[name = string("op_14238"), val = tensor([1, 8, 1, 128])]; tensor var_14239 = reshape(shape = var_14238, x = var_14233)[name = string("op_14239")]; string var_14255_pad_type_0 = const()[name = string("op_14255_pad_type_0"), val = string("valid")]; tensor var_14255_strides_0 = const()[name = string("op_14255_strides_0"), val = tensor([1, 1])]; tensor var_14255_pad_0 = const()[name = string("op_14255_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_14255_dilations_0 = const()[name = string("op_14255_dilations_0"), val = tensor([1, 1])]; int32 var_14255_groups_0 = const()[name = string("op_14255_groups_0"), val = int32(1)]; tensor var_14255 = conv(dilations = var_14255_dilations_0, groups = var_14255_groups_0, pad = var_14255_pad_0, pad_type = var_14255_pad_type_0, strides = var_14255_strides_0, weight = model_model_layers_27_self_attn_v_proj_weight_palettized, x = var_14195)[name = string("op_14255")]; tensor var_14260 = const()[name = string("op_14260"), val = tensor([1, 8, 1, 128])]; tensor var_14261 = reshape(shape = var_14260, x = var_14255)[name = string("op_14261")]; tensor mean_219_axes_0 = const()[name = string("mean_219_axes_0"), val = tensor([-1])]; bool mean_219_keep_dims_0 = const()[name = string("mean_219_keep_dims_0"), val = bool(true)]; tensor mean_219 = reduce_mean(axes = mean_219_axes_0, keep_dims = mean_219_keep_dims_0, x = var_14217)[name = string("mean_219")]; tensor input_491 = sub(x = var_14217, y = mean_219)[name = string("input_491")]; tensor var_14282_axes_0 = const()[name = string("op_14282_axes_0"), val = tensor([-1])]; tensor model_model_layers_27_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_27_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1096786816)))]; fp16 var_14270_to_fp16 = const()[name = string("op_14270_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_14282_cast_fp16 = layer_norm(axes = var_14282_axes_0, epsilon = var_14270_to_fp16, gamma = model_model_layers_27_self_attn_q_norm_weight_to_fp16, x = input_491)[name = string("op_14282_cast_fp16")]; tensor mean_221_axes_0 = const()[name = string("mean_221_axes_0"), val = tensor([-1])]; bool mean_221_keep_dims_0 = const()[name = string("mean_221_keep_dims_0"), val = bool(true)]; tensor mean_221 = reduce_mean(axes = mean_221_axes_0, keep_dims = mean_221_keep_dims_0, x = var_14239)[name = string("mean_221")]; tensor input_493 = sub(x = var_14239, y = mean_221)[name = string("input_493")]; tensor var_14300_axes_0 = const()[name = string("op_14300_axes_0"), val = tensor([-1])]; tensor model_model_layers_27_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_27_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1096787136)))]; fp16 var_14288_to_fp16 = const()[name = string("op_14288_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_14300_cast_fp16 = layer_norm(axes = var_14300_axes_0, epsilon = var_14288_to_fp16, gamma = model_model_layers_27_self_attn_k_norm_weight_to_fp16, x = input_493)[name = string("op_14300_cast_fp16")]; tensor var_14303 = mul(x = var_14282_cast_fp16, y = cos_1_cast_fp16)[name = string("op_14303")]; tensor x1_109_begin_0 = const()[name = string("x1_109_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_109_end_0 = const()[name = string("x1_109_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_109_end_mask_0 = const()[name = string("x1_109_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_109 = slice_by_index(begin = x1_109_begin_0, end = x1_109_end_0, end_mask = x1_109_end_mask_0, x = var_14282_cast_fp16)[name = string("x1_109")]; tensor x2_109_begin_0 = const()[name = string("x2_109_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_109_end_0 = const()[name = string("x2_109_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_109_end_mask_0 = const()[name = string("x2_109_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_109 = slice_by_index(begin = x2_109_begin_0, end = x2_109_end_0, end_mask = x2_109_end_mask_0, x = var_14282_cast_fp16)[name = string("x2_109")]; fp16 const_491_promoted = const()[name = string("const_491_promoted"), val = fp16(-0x1p+0)]; tensor var_14324 = mul(x = x2_109, y = const_491_promoted)[name = string("op_14324")]; int32 var_14326 = const()[name = string("op_14326"), val = int32(-1)]; bool var_14327_interleave_0 = const()[name = string("op_14327_interleave_0"), val = bool(false)]; tensor var_14327 = concat(axis = var_14326, interleave = var_14327_interleave_0, values = (var_14324, x1_109))[name = string("op_14327")]; tensor var_14328 = mul(x = var_14327, y = sin_1_cast_fp16)[name = string("op_14328")]; tensor query_states_109 = add(x = var_14303, y = var_14328)[name = string("query_states_109")]; tensor var_14331 = mul(x = var_14300_cast_fp16, y = cos_1_cast_fp16)[name = string("op_14331")]; tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = var_14300_cast_fp16)[name = string("x1")]; tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = var_14300_cast_fp16)[name = string("x2")]; fp16 const_494_promoted = const()[name = string("const_494_promoted"), val = fp16(-0x1p+0)]; tensor var_14352 = mul(x = x2, y = const_494_promoted)[name = string("op_14352")]; int32 var_14354 = const()[name = string("op_14354"), val = int32(-1)]; bool var_14355_interleave_0 = const()[name = string("op_14355_interleave_0"), val = bool(false)]; tensor var_14355 = concat(axis = var_14354, interleave = var_14355_interleave_0, values = (var_14352, x1))[name = string("op_14355")]; tensor var_14356 = mul(x = var_14355, y = sin_1_cast_fp16)[name = string("op_14356")]; tensor key_states_109 = add(x = var_14331, y = var_14356)[name = string("key_states_109")]; tensor expand_dims_324 = const()[name = string("expand_dims_324"), val = tensor([27])]; tensor expand_dims_325 = const()[name = string("expand_dims_325"), val = tensor([0])]; tensor expand_dims_327 = const()[name = string("expand_dims_327"), val = tensor([0])]; tensor expand_dims_328 = const()[name = string("expand_dims_328"), val = tensor([28])]; int32 concat_218_axis_0 = const()[name = string("concat_218_axis_0"), val = int32(0)]; bool concat_218_interleave_0 = const()[name = string("concat_218_interleave_0"), val = bool(false)]; tensor concat_218 = concat(axis = concat_218_axis_0, interleave = concat_218_interleave_0, values = (expand_dims_324, expand_dims_325, current_pos, expand_dims_327))[name = string("concat_218")]; tensor concat_219_values1_0 = const()[name = string("concat_219_values1_0"), val = tensor([0])]; tensor concat_219_values3_0 = const()[name = string("concat_219_values3_0"), val = tensor([0])]; int32 concat_219_axis_0 = const()[name = string("concat_219_axis_0"), val = int32(0)]; bool concat_219_interleave_0 = const()[name = string("concat_219_interleave_0"), val = bool(false)]; tensor concat_219 = concat(axis = concat_219_axis_0, interleave = concat_219_interleave_0, values = (expand_dims_328, concat_219_values1_0, var_1725, concat_219_values3_0))[name = string("concat_219")]; tensor model_model_kv_cache_0_internal_tensor_assign_55_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_55_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_55_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_55_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_55_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_55_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_55_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_55_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_55_cast_fp16 = slice_update(begin = concat_218, begin_mask = model_model_kv_cache_0_internal_tensor_assign_55_begin_mask_0, end = concat_219, end_mask = model_model_kv_cache_0_internal_tensor_assign_55_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_55_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_55_stride_0, update = key_states_109, x = coreml_update_state_109)[name = string("model_model_kv_cache_0_internal_tensor_assign_55_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_55_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_54_write_state")]; tensor coreml_update_state_110 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_54")]; tensor expand_dims_330 = const()[name = string("expand_dims_330"), val = tensor([55])]; tensor expand_dims_331 = const()[name = string("expand_dims_331"), val = tensor([0])]; tensor expand_dims_333 = const()[name = string("expand_dims_333"), val = tensor([0])]; tensor expand_dims_334 = const()[name = string("expand_dims_334"), val = tensor([56])]; int32 concat_222_axis_0 = const()[name = string("concat_222_axis_0"), val = int32(0)]; bool concat_222_interleave_0 = const()[name = string("concat_222_interleave_0"), val = bool(false)]; tensor concat_222 = concat(axis = concat_222_axis_0, interleave = concat_222_interleave_0, values = (expand_dims_330, expand_dims_331, current_pos, expand_dims_333))[name = string("concat_222")]; tensor concat_223_values1_0 = const()[name = string("concat_223_values1_0"), val = tensor([0])]; tensor concat_223_values3_0 = const()[name = string("concat_223_values3_0"), val = tensor([0])]; int32 concat_223_axis_0 = const()[name = string("concat_223_axis_0"), val = int32(0)]; bool concat_223_interleave_0 = const()[name = string("concat_223_interleave_0"), val = bool(false)]; tensor concat_223 = concat(axis = concat_223_axis_0, interleave = concat_223_interleave_0, values = (expand_dims_334, concat_223_values1_0, var_1725, concat_223_values3_0))[name = string("concat_223")]; tensor model_model_kv_cache_0_internal_tensor_assign_56_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_56_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_56_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_56_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_56_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_56_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_56_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_56_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_56_cast_fp16 = slice_update(begin = concat_222, begin_mask = model_model_kv_cache_0_internal_tensor_assign_56_begin_mask_0, end = concat_223, end_mask = model_model_kv_cache_0_internal_tensor_assign_56_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_56_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_56_stride_0, update = var_14261, x = coreml_update_state_110)[name = string("model_model_kv_cache_0_internal_tensor_assign_56_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_56_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_55_write_state")]; tensor coreml_update_state_111 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_55")]; tensor var_14411_begin_0 = const()[name = string("op_14411_begin_0"), val = tensor([27, 0, 0, 0])]; tensor var_14411_end_0 = const()[name = string("op_14411_end_0"), val = tensor([28, 8, 1024, 128])]; tensor var_14411_end_mask_0 = const()[name = string("op_14411_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_14411_cast_fp16 = slice_by_index(begin = var_14411_begin_0, end = var_14411_end_0, end_mask = var_14411_end_mask_0, x = coreml_update_state_111)[name = string("op_14411_cast_fp16")]; tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_14411_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; tensor var_14418_begin_0 = const()[name = string("op_14418_begin_0"), val = tensor([55, 0, 0, 0])]; tensor var_14418_end_0 = const()[name = string("op_14418_end_0"), val = tensor([1, 8, 1024, 128])]; tensor var_14418_end_mask_0 = const()[name = string("op_14418_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_14418_cast_fp16 = slice_by_index(begin = var_14418_begin_0, end = var_14418_end_0, end_mask = var_14418_end_mask_0, x = coreml_update_state_111)[name = string("op_14418_cast_fp16")]; tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_14418_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; tensor x_547_axes_0 = const()[name = string("x_547_axes_0"), val = tensor([1])]; tensor x_547_cast_fp16 = expand_dims(axes = x_547_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_547_cast_fp16")]; tensor var_14455 = const()[name = string("op_14455"), val = tensor([1, 2, 1, 1])]; tensor x_549_cast_fp16 = tile(reps = var_14455, x = x_547_cast_fp16)[name = string("x_549_cast_fp16")]; tensor var_14467 = const()[name = string("op_14467"), val = tensor([1, -1, 1024, 128])]; tensor key_states_cast_fp16 = reshape(shape = var_14467, x = x_549_cast_fp16)[name = string("key_states_cast_fp16")]; tensor x_553_axes_0 = const()[name = string("x_553_axes_0"), val = tensor([1])]; tensor x_553_cast_fp16 = expand_dims(axes = x_553_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_553_cast_fp16")]; tensor var_14475 = const()[name = string("op_14475"), val = tensor([1, 2, 1, 1])]; tensor x_555_cast_fp16 = tile(reps = var_14475, x = x_553_cast_fp16)[name = string("x_555_cast_fp16")]; tensor var_14487 = const()[name = string("op_14487"), val = tensor([1, -1, 1024, 128])]; tensor value_states_165_cast_fp16 = reshape(shape = var_14487, x = x_555_cast_fp16)[name = string("value_states_165_cast_fp16")]; bool var_14502_transpose_x_1 = const()[name = string("op_14502_transpose_x_1"), val = bool(false)]; bool var_14502_transpose_y_1 = const()[name = string("op_14502_transpose_y_1"), val = bool(true)]; tensor var_14502 = matmul(transpose_x = var_14502_transpose_x_1, transpose_y = var_14502_transpose_y_1, x = query_states_109, y = key_states_cast_fp16)[name = string("op_14502")]; fp16 var_14503_to_fp16 = const()[name = string("op_14503_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_163_cast_fp16 = mul(x = var_14502, y = var_14503_to_fp16)[name = string("attn_weights_163_cast_fp16")]; tensor attn_weights_165_cast_fp16 = add(x = attn_weights_163_cast_fp16, y = causal_mask)[name = string("attn_weights_165_cast_fp16")]; int32 var_14538 = const()[name = string("op_14538"), val = int32(-1)]; tensor attn_weights_cast_fp16 = softmax(axis = var_14538, x = attn_weights_165_cast_fp16)[name = string("attn_weights_cast_fp16")]; bool attn_output_271_transpose_x_0 = const()[name = string("attn_output_271_transpose_x_0"), val = bool(false)]; bool attn_output_271_transpose_y_0 = const()[name = string("attn_output_271_transpose_y_0"), val = bool(false)]; tensor attn_output_271_cast_fp16 = matmul(transpose_x = attn_output_271_transpose_x_0, transpose_y = attn_output_271_transpose_y_0, x = attn_weights_cast_fp16, y = value_states_165_cast_fp16)[name = string("attn_output_271_cast_fp16")]; tensor var_14549_perm_0 = const()[name = string("op_14549_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_14553 = const()[name = string("op_14553"), val = tensor([1, 1, 2048])]; tensor var_14549_cast_fp16 = transpose(perm = var_14549_perm_0, x = attn_output_271_cast_fp16)[name = string("transpose_4")]; tensor attn_output_275_cast_fp16 = reshape(shape = var_14553, x = var_14549_cast_fp16)[name = string("attn_output_275_cast_fp16")]; tensor var_14558 = const()[name = string("op_14558"), val = tensor([0, 2, 1])]; string var_14574_pad_type_0 = const()[name = string("op_14574_pad_type_0"), val = string("valid")]; int32 var_14574_groups_0 = const()[name = string("op_14574_groups_0"), val = int32(1)]; tensor var_14574_strides_0 = const()[name = string("op_14574_strides_0"), val = tensor([1])]; tensor var_14574_pad_0 = const()[name = string("op_14574_pad_0"), val = tensor([0, 0])]; tensor var_14574_dilations_0 = const()[name = string("op_14574_dilations_0"), val = tensor([1])]; tensor squeeze_27_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1096787456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1099933248))))[name = string("squeeze_27_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_14559_cast_fp16 = transpose(perm = var_14558, x = attn_output_275_cast_fp16)[name = string("transpose_3")]; tensor var_14574_cast_fp16 = conv(dilations = var_14574_dilations_0, groups = var_14574_groups_0, pad = var_14574_pad_0, pad_type = var_14574_pad_type_0, strides = var_14574_strides_0, weight = squeeze_27_cast_fp16_to_fp32_to_fp16_palettized, x = var_14559_cast_fp16)[name = string("op_14574_cast_fp16")]; tensor var_14578 = const()[name = string("op_14578"), val = tensor([0, 2, 1])]; tensor attn_output_cast_fp16 = transpose(perm = var_14578, x = var_14574_cast_fp16)[name = string("transpose_2")]; tensor hidden_states_167_cast_fp16 = add(x = hidden_states_163_cast_fp16, y = attn_output_cast_fp16)[name = string("hidden_states_167_cast_fp16")]; tensor mean_223_axes_0 = const()[name = string("mean_223_axes_0"), val = tensor([-1])]; bool mean_223_keep_dims_0 = const()[name = string("mean_223_keep_dims_0"), val = bool(true)]; tensor mean_223_cast_fp16 = reduce_mean(axes = mean_223_axes_0, keep_dims = mean_223_keep_dims_0, x = hidden_states_167_cast_fp16)[name = string("mean_223_cast_fp16")]; tensor input_497_cast_fp16 = sub(x = hidden_states_167_cast_fp16, y = mean_223_cast_fp16)[name = string("input_497_cast_fp16")]; tensor var_14597_axes_0 = const()[name = string("op_14597_axes_0"), val = tensor([-1])]; tensor model_model_layers_27_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_27_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1099966080)))]; fp16 var_14585_to_fp16 = const()[name = string("op_14585_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_14597_cast_fp16 = layer_norm(axes = var_14597_axes_0, epsilon = var_14585_to_fp16, gamma = model_model_layers_27_post_attention_layernorm_weight_to_fp16, x = input_497_cast_fp16)[name = string("op_14597_cast_fp16")]; tensor var_14611 = const()[name = string("op_14611"), val = tensor([0, 2, 1])]; tensor input_499_axes_0 = const()[name = string("input_499_axes_0"), val = tensor([2])]; tensor var_14612 = transpose(perm = var_14611, x = var_14597_cast_fp16)[name = string("transpose_1")]; tensor input_499 = expand_dims(axes = input_499_axes_0, x = var_14612)[name = string("input_499")]; string input_501_pad_type_0 = const()[name = string("input_501_pad_type_0"), val = string("valid")]; tensor input_501_strides_0 = const()[name = string("input_501_strides_0"), val = tensor([1, 1])]; tensor input_501_pad_0 = const()[name = string("input_501_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_501_dilations_0 = const()[name = string("input_501_dilations_0"), val = tensor([1, 1])]; int32 input_501_groups_0 = const()[name = string("input_501_groups_0"), val = int32(1)]; tensor input_501 = conv(dilations = input_501_dilations_0, groups = input_501_groups_0, pad = input_501_pad_0, pad_type = input_501_pad_type_0, strides = input_501_strides_0, weight = model_model_layers_27_mlp_gate_proj_weight_palettized, x = input_499)[name = string("input_501")]; string b_pad_type_0 = const()[name = string("b_pad_type_0"), val = string("valid")]; tensor b_strides_0 = const()[name = string("b_strides_0"), val = tensor([1, 1])]; tensor b_pad_0 = const()[name = string("b_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_dilations_0 = const()[name = string("b_dilations_0"), val = tensor([1, 1])]; int32 b_groups_0 = const()[name = string("b_groups_0"), val = int32(1)]; tensor b = conv(dilations = b_dilations_0, groups = b_groups_0, pad = b_pad_0, pad_type = b_pad_type_0, strides = b_strides_0, weight = model_model_layers_27_mlp_up_proj_weight_palettized, x = input_499)[name = string("b")]; tensor c = silu(x = input_501)[name = string("c")]; tensor input_503 = mul(x = c, y = b)[name = string("input_503")]; string e_pad_type_0 = const()[name = string("e_pad_type_0"), val = string("valid")]; tensor e_strides_0 = const()[name = string("e_strides_0"), val = tensor([1, 1])]; tensor e_pad_0 = const()[name = string("e_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_dilations_0 = const()[name = string("e_dilations_0"), val = tensor([1, 1])]; int32 e_groups_0 = const()[name = string("e_groups_0"), val = int32(1)]; tensor e = conv(dilations = e_dilations_0, groups = e_groups_0, pad = e_pad_0, pad_type = e_pad_type_0, strides = e_strides_0, weight = model_model_layers_27_mlp_down_proj_weight_palettized, x = input_503)[name = string("e")]; tensor var_14634_axes_0 = const()[name = string("op_14634_axes_0"), val = tensor([2])]; tensor var_14634 = squeeze(axes = var_14634_axes_0, x = e)[name = string("op_14634")]; tensor var_14635 = const()[name = string("op_14635"), val = tensor([0, 2, 1])]; tensor var_14636 = transpose(perm = var_14635, x = var_14634)[name = string("transpose_0")]; tensor hidden_states_cast_fp16 = add(x = hidden_states_167_cast_fp16, y = var_14636)[name = string("hidden_states_cast_fp16")]; tensor mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor([-1])]; bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)]; tensor mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_cast_fp16)[name = string("mean_cast_fp16")]; tensor input_cast_fp16 = sub(x = hidden_states_cast_fp16, y = mean_cast_fp16)[name = string("input_cast_fp16")]; tensor var_14654_axes_0 = const()[name = string("op_14654_axes_0"), val = tensor([-1])]; tensor model_model_norm_weight_to_fp16 = const()[name = string("model_model_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1099970240)))]; fp16 var_14642_to_fp16 = const()[name = string("op_14642_to_fp16"), val = fp16(0x1.1p-20)]; tensor output_hidden_states = layer_norm(axes = var_14654_axes_0, epsilon = var_14642_to_fp16, gamma = model_model_norm_weight_to_fp16, x = input_cast_fp16)[name = string("op_14654_cast_fp16")]; tensor position_ids_tmp = identity(x = position_ids)[name = string("position_ids_tmp")]; } -> (output_hidden_states); func prefill(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { tensor model_model_layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3145856))))[name = string("model_model_layers_0_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3178688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4751616))))[name = string("model_model_layers_0_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4768064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6340992))))[name = string("model_model_layers_0_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6357440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15794688))))[name = string("model_model_layers_0_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15893056))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25330304))))[name = string("model_model_layers_0_mlp_up_proj_weight_palettized")]; tensor model_model_layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25428672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34865920))))[name = string("model_model_layers_0_mlp_down_proj_weight_palettized")]; tensor model_model_layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34898752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38044544))))[name = string("model_model_layers_1_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38077376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39650304))))[name = string("model_model_layers_1_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39666752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41239680))))[name = string("model_model_layers_1_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41256128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50693376))))[name = string("model_model_layers_1_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50791744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60228992))))[name = string("model_model_layers_1_mlp_up_proj_weight_palettized")]; tensor model_model_layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60327360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69764608))))[name = string("model_model_layers_1_mlp_down_proj_weight_palettized")]; tensor model_model_layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69797440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72943232))))[name = string("model_model_layers_2_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72976064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74548992))))[name = string("model_model_layers_2_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74565440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76138368))))[name = string("model_model_layers_2_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76154816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85592064))))[name = string("model_model_layers_2_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85690432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95127680))))[name = string("model_model_layers_2_mlp_up_proj_weight_palettized")]; tensor model_model_layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95226048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104663296))))[name = string("model_model_layers_2_mlp_down_proj_weight_palettized")]; tensor model_model_layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104696128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107841920))))[name = string("model_model_layers_3_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107874752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109447680))))[name = string("model_model_layers_3_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109464128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111037056))))[name = string("model_model_layers_3_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111053504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120490752))))[name = string("model_model_layers_3_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120589120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130026368))))[name = string("model_model_layers_3_mlp_up_proj_weight_palettized")]; tensor model_model_layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130124736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139561984))))[name = string("model_model_layers_3_mlp_down_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139594816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142740608))))[name = string("model_model_layers_4_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142773440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144346368))))[name = string("model_model_layers_4_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144362816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145935744))))[name = string("model_model_layers_4_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145952192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155389440))))[name = string("model_model_layers_4_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155487808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164925056))))[name = string("model_model_layers_4_mlp_up_proj_weight_palettized")]; tensor model_model_layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165023424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174460672))))[name = string("model_model_layers_4_mlp_down_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174493504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177639296))))[name = string("model_model_layers_5_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177672128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179245056))))[name = string("model_model_layers_5_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179261504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180834432))))[name = string("model_model_layers_5_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180850880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190288128))))[name = string("model_model_layers_5_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190386496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(199823744))))[name = string("model_model_layers_5_mlp_up_proj_weight_palettized")]; tensor model_model_layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(199922112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209359360))))[name = string("model_model_layers_5_mlp_down_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209392192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212537984))))[name = string("model_model_layers_6_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212570816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214143744))))[name = string("model_model_layers_6_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214160192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215733120))))[name = string("model_model_layers_6_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215749568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225186816))))[name = string("model_model_layers_6_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225285184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234722432))))[name = string("model_model_layers_6_mlp_up_proj_weight_palettized")]; tensor model_model_layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234820800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244258048))))[name = string("model_model_layers_6_mlp_down_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244290880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247436672))))[name = string("model_model_layers_7_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247469504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249042432))))[name = string("model_model_layers_7_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249058880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(250631808))))[name = string("model_model_layers_7_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(250648256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260085504))))[name = string("model_model_layers_7_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260183872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269621120))))[name = string("model_model_layers_7_mlp_up_proj_weight_palettized")]; tensor model_model_layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269719488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279156736))))[name = string("model_model_layers_7_mlp_down_proj_weight_palettized")]; tensor model_model_layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279189568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282335360))))[name = string("model_model_layers_8_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282368192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283941120))))[name = string("model_model_layers_8_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283957568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285530496))))[name = string("model_model_layers_8_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285546944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294984192))))[name = string("model_model_layers_8_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295082560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304519808))))[name = string("model_model_layers_8_mlp_up_proj_weight_palettized")]; tensor model_model_layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304618176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314055424))))[name = string("model_model_layers_8_mlp_down_proj_weight_palettized")]; tensor model_model_layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314088256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317234048))))[name = string("model_model_layers_9_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317266880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318839808))))[name = string("model_model_layers_9_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318856256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320429184))))[name = string("model_model_layers_9_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_9_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320445632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(329882880))))[name = string("model_model_layers_9_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_9_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(329981248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339418496))))[name = string("model_model_layers_9_mlp_up_proj_weight_palettized")]; tensor model_model_layers_9_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339516864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348954112))))[name = string("model_model_layers_9_mlp_down_proj_weight_palettized")]; tensor model_model_layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348986944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352132736))))[name = string("model_model_layers_10_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352165568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353738496))))[name = string("model_model_layers_10_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353754944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355327872))))[name = string("model_model_layers_10_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_10_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355344320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364781568))))[name = string("model_model_layers_10_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_10_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364879936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374317184))))[name = string("model_model_layers_10_mlp_up_proj_weight_palettized")]; tensor model_model_layers_10_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374415552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383852800))))[name = string("model_model_layers_10_mlp_down_proj_weight_palettized")]; tensor model_model_layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383885632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387031424))))[name = string("model_model_layers_11_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387064256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388637184))))[name = string("model_model_layers_11_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388653632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390226560))))[name = string("model_model_layers_11_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_11_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390243008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399680256))))[name = string("model_model_layers_11_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_11_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399778624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409215872))))[name = string("model_model_layers_11_mlp_up_proj_weight_palettized")]; tensor model_model_layers_11_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409314240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418751488))))[name = string("model_model_layers_11_mlp_down_proj_weight_palettized")]; tensor model_model_layers_12_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418784320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421930112))))[name = string("model_model_layers_12_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_12_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421962944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423535872))))[name = string("model_model_layers_12_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_12_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423552320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425125248))))[name = string("model_model_layers_12_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_12_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425141696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434578944))))[name = string("model_model_layers_12_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_12_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434677312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444114560))))[name = string("model_model_layers_12_mlp_up_proj_weight_palettized")]; tensor model_model_layers_12_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444212928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453650176))))[name = string("model_model_layers_12_mlp_down_proj_weight_palettized")]; tensor model_model_layers_13_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453683008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(456828800))))[name = string("model_model_layers_13_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_13_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(456861632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458434560))))[name = string("model_model_layers_13_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_13_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458451008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(460023936))))[name = string("model_model_layers_13_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_13_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(460040384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469477632))))[name = string("model_model_layers_13_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_13_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469576000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479013248))))[name = string("model_model_layers_13_mlp_up_proj_weight_palettized")]; tensor model_model_layers_13_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479111616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488548864))))[name = string("model_model_layers_13_mlp_down_proj_weight_palettized")]; tensor model_model_layers_14_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488581696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491727488))))[name = string("model_model_layers_14_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_14_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491760320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493333248))))[name = string("model_model_layers_14_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_14_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493349696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(494922624))))[name = string("model_model_layers_14_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_14_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(494939072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504376320))))[name = string("model_model_layers_14_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_14_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504474688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513911936))))[name = string("model_model_layers_14_mlp_up_proj_weight_palettized")]; tensor model_model_layers_14_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(514010304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523447552))))[name = string("model_model_layers_14_mlp_down_proj_weight_palettized")]; tensor model_model_layers_15_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523480384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(526626176))))[name = string("model_model_layers_15_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_15_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(526659008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528231936))))[name = string("model_model_layers_15_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_15_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528248384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529821312))))[name = string("model_model_layers_15_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_15_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529837760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539275008))))[name = string("model_model_layers_15_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_15_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539373376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548810624))))[name = string("model_model_layers_15_mlp_up_proj_weight_palettized")]; tensor model_model_layers_15_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548908992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558346240))))[name = string("model_model_layers_15_mlp_down_proj_weight_palettized")]; tensor model_model_layers_16_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558379072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(561524864))))[name = string("model_model_layers_16_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_16_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(561557696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(563130624))))[name = string("model_model_layers_16_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_16_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(563147072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564720000))))[name = string("model_model_layers_16_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_16_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564736448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(574173696))))[name = string("model_model_layers_16_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_16_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(574272064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(583709312))))[name = string("model_model_layers_16_mlp_up_proj_weight_palettized")]; tensor model_model_layers_16_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(583807680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(593244928))))[name = string("model_model_layers_16_mlp_down_proj_weight_palettized")]; tensor model_model_layers_17_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(593277760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(596423552))))[name = string("model_model_layers_17_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_17_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(596456384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(598029312))))[name = string("model_model_layers_17_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_17_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(598045760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(599618688))))[name = string("model_model_layers_17_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_17_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(599635136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(609072384))))[name = string("model_model_layers_17_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_17_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(609170752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(618608000))))[name = string("model_model_layers_17_mlp_up_proj_weight_palettized")]; tensor model_model_layers_17_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(618706368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(628143616))))[name = string("model_model_layers_17_mlp_down_proj_weight_palettized")]; tensor model_model_layers_18_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(628176448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(631322240))))[name = string("model_model_layers_18_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_18_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(631355072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(632928000))))[name = string("model_model_layers_18_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_18_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(632944448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(634517376))))[name = string("model_model_layers_18_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_18_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(634533824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(643971072))))[name = string("model_model_layers_18_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_18_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(644069440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(653506688))))[name = string("model_model_layers_18_mlp_up_proj_weight_palettized")]; tensor model_model_layers_18_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(653605056))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(663042304))))[name = string("model_model_layers_18_mlp_down_proj_weight_palettized")]; tensor model_model_layers_19_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(663075136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(666220928))))[name = string("model_model_layers_19_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_19_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(666253760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(667826688))))[name = string("model_model_layers_19_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_19_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(667843136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(669416064))))[name = string("model_model_layers_19_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_19_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(669432512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678869760))))[name = string("model_model_layers_19_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_19_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678968128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(688405376))))[name = string("model_model_layers_19_mlp_up_proj_weight_palettized")]; tensor model_model_layers_19_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(688503744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697940992))))[name = string("model_model_layers_19_mlp_down_proj_weight_palettized")]; tensor model_model_layers_20_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697973824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(701119616))))[name = string("model_model_layers_20_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_20_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(701152448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702725376))))[name = string("model_model_layers_20_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_20_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702741824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(704314752))))[name = string("model_model_layers_20_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_20_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(704331200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(713768448))))[name = string("model_model_layers_20_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_20_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(713866816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(723304064))))[name = string("model_model_layers_20_mlp_up_proj_weight_palettized")]; tensor model_model_layers_20_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(723402432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(732839680))))[name = string("model_model_layers_20_mlp_down_proj_weight_palettized")]; tensor model_model_layers_21_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(732872512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736018304))))[name = string("model_model_layers_21_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_21_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736051136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(737624064))))[name = string("model_model_layers_21_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_21_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(737640512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(739213440))))[name = string("model_model_layers_21_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_21_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(739229888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748667136))))[name = string("model_model_layers_21_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_21_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748765504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(758202752))))[name = string("model_model_layers_21_mlp_up_proj_weight_palettized")]; tensor model_model_layers_21_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(758301120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(767738368))))[name = string("model_model_layers_21_mlp_down_proj_weight_palettized")]; tensor model_model_layers_22_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(767771200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(770916992))))[name = string("model_model_layers_22_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_22_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(770949824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(772522752))))[name = string("model_model_layers_22_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_22_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(772539200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774112128))))[name = string("model_model_layers_22_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_22_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774128576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(783565824))))[name = string("model_model_layers_22_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_22_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(783664192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(793101440))))[name = string("model_model_layers_22_mlp_up_proj_weight_palettized")]; tensor model_model_layers_22_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(793199808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(802637056))))[name = string("model_model_layers_22_mlp_down_proj_weight_palettized")]; tensor model_model_layers_23_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(802669888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(805815680))))[name = string("model_model_layers_23_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_23_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(805848512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(807421440))))[name = string("model_model_layers_23_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_23_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(807437888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(809010816))))[name = string("model_model_layers_23_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_23_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(809027264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(818464512))))[name = string("model_model_layers_23_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_23_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(818562880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(828000128))))[name = string("model_model_layers_23_mlp_up_proj_weight_palettized")]; tensor model_model_layers_23_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(828098496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(837535744))))[name = string("model_model_layers_23_mlp_down_proj_weight_palettized")]; tensor model_model_layers_24_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(837568576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840714368))))[name = string("model_model_layers_24_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_24_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840747200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(842320128))))[name = string("model_model_layers_24_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_24_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(842336576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(843909504))))[name = string("model_model_layers_24_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_24_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(843925952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(853363200))))[name = string("model_model_layers_24_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_24_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(853461568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(862898816))))[name = string("model_model_layers_24_mlp_up_proj_weight_palettized")]; tensor model_model_layers_24_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(862997184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872434432))))[name = string("model_model_layers_24_mlp_down_proj_weight_palettized")]; tensor model_model_layers_25_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872467264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875613056))))[name = string("model_model_layers_25_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_25_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875645888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(877218816))))[name = string("model_model_layers_25_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_25_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(877235264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(878808192))))[name = string("model_model_layers_25_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_25_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(878824640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(888261888))))[name = string("model_model_layers_25_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_25_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(888360256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(897797504))))[name = string("model_model_layers_25_mlp_up_proj_weight_palettized")]; tensor model_model_layers_25_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(897895872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907333120))))[name = string("model_model_layers_25_mlp_down_proj_weight_palettized")]; tensor model_model_layers_26_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(907365952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(910511744))))[name = string("model_model_layers_26_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_26_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(910544576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912117504))))[name = string("model_model_layers_26_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_26_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912133952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(913706880))))[name = string("model_model_layers_26_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_26_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(913723328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923160576))))[name = string("model_model_layers_26_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_26_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923258944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(932696192))))[name = string("model_model_layers_26_mlp_up_proj_weight_palettized")]; tensor model_model_layers_26_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(932794560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(942231808))))[name = string("model_model_layers_26_mlp_down_proj_weight_palettized")]; tensor model_model_layers_27_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(942264640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(945410432))))[name = string("model_model_layers_27_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_27_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(945443264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(947016192))))[name = string("model_model_layers_27_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_27_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(947032640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(948605568))))[name = string("model_model_layers_27_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_27_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(948622016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(958059264))))[name = string("model_model_layers_27_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_27_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(958157632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(967594880))))[name = string("model_model_layers_27_mlp_up_proj_weight_palettized")]; tensor model_model_layers_27_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(967693248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(977130496))))[name = string("model_model_layers_27_mlp_down_proj_weight_palettized")]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor greater_equal_0 = greater_equal(x = position_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(65536)]; tensor add_0 = add(x = position_ids, y = slice_by_index_0)[name = string("add_0")]; tensor select_0 = select(a = position_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")]; int32 var_1505_axis_0 = const()[name = string("op_1505_axis_0"), val = int32(1)]; int32 var_1505_batch_dims_0 = const()[name = string("op_1505_batch_dims_0"), val = int32(0)]; bool var_1505_validate_indices_0 = const()[name = string("op_1505_validate_indices_0"), val = bool(false)]; tensor var_1497_to_fp16 = const()[name = string("op_1497_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(993940608)))]; tensor var_1505_cast_fp16 = gather(axis = var_1505_axis_0, batch_dims = var_1505_batch_dims_0, indices = select_0, validate_indices = var_1505_validate_indices_0, x = var_1497_to_fp16)[name = string("op_1505_cast_fp16")]; tensor var_1509 = const()[name = string("op_1509"), val = tensor([1, 64, 1, 128])]; tensor cos_1_cast_fp16 = reshape(shape = var_1509, x = var_1505_cast_fp16)[name = string("cos_1_cast_fp16")]; int32 var_1519_axis_0 = const()[name = string("op_1519_axis_0"), val = int32(1)]; int32 var_1519_batch_dims_0 = const()[name = string("op_1519_batch_dims_0"), val = int32(0)]; bool var_1519_validate_indices_0 = const()[name = string("op_1519_validate_indices_0"), val = bool(false)]; tensor var_1511_to_fp16 = const()[name = string("op_1511_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(977163328)))]; tensor var_1519_cast_fp16 = gather(axis = var_1519_axis_0, batch_dims = var_1519_batch_dims_0, indices = select_0, validate_indices = var_1519_validate_indices_0, x = var_1511_to_fp16)[name = string("op_1519_cast_fp16")]; tensor var_1523 = const()[name = string("op_1523"), val = tensor([1, 64, 1, 128])]; tensor sin_1_cast_fp16 = reshape(shape = var_1523, x = var_1519_cast_fp16)[name = string("sin_1_cast_fp16")]; tensor mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor([-1])]; bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)]; tensor mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")]; tensor input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")]; tensor var_1550_axes_0 = const()[name = string("op_1550_axes_0"), val = tensor([-1])]; tensor model_model_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010717888)))]; fp16 var_1538_to_fp16 = const()[name = string("op_1538_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1550_cast_fp16 = layer_norm(axes = var_1550_axes_0, epsilon = var_1538_to_fp16, gamma = model_model_layers_0_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_1550_cast_fp16")]; tensor var_1562 = const()[name = string("op_1562"), val = tensor([0, 2, 1])]; tensor var_1565_axes_0 = const()[name = string("op_1565_axes_0"), val = tensor([2])]; tensor var_1563 = transpose(perm = var_1562, x = var_1550_cast_fp16)[name = string("transpose_253")]; tensor var_1565 = expand_dims(axes = var_1565_axes_0, x = var_1563)[name = string("op_1565")]; string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; tensor query_states_1 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = model_model_layers_0_self_attn_q_proj_weight_palettized, x = var_1565)[name = string("query_states_1")]; string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; tensor key_states_1 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = model_model_layers_0_self_attn_k_proj_weight_palettized, x = var_1565)[name = string("key_states_1")]; string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; tensor value_states_1 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = model_model_layers_0_self_attn_v_proj_weight_palettized, x = var_1565)[name = string("value_states_1")]; tensor var_1607 = const()[name = string("op_1607"), val = tensor([1, 16, 128, 64])]; tensor var_1608 = reshape(shape = var_1607, x = query_states_1)[name = string("op_1608")]; tensor var_1613 = const()[name = string("op_1613"), val = tensor([0, 1, 3, 2])]; tensor var_1618 = const()[name = string("op_1618"), val = tensor([1, 8, 128, 64])]; tensor var_1619 = reshape(shape = var_1618, x = key_states_1)[name = string("op_1619")]; tensor var_1624 = const()[name = string("op_1624"), val = tensor([0, 1, 3, 2])]; tensor var_1629 = const()[name = string("op_1629"), val = tensor([1, 8, 128, 64])]; tensor var_1630 = reshape(shape = var_1629, x = value_states_1)[name = string("op_1630")]; tensor var_1635 = const()[name = string("op_1635"), val = tensor([0, 1, 3, 2])]; tensor mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor([-1])]; bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)]; tensor x_1 = transpose(perm = var_1613, x = var_1608)[name = string("transpose_252")]; tensor mean_3 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = x_1)[name = string("mean_3")]; tensor input_5 = sub(x = x_1, y = mean_3)[name = string("input_5")]; tensor var_1652_axes_0 = const()[name = string("op_1652_axes_0"), val = tensor([-1])]; tensor model_model_layers_0_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_0_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010722048)))]; fp16 var_1640_to_fp16 = const()[name = string("op_1640_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1652_cast_fp16 = layer_norm(axes = var_1652_axes_0, epsilon = var_1640_to_fp16, gamma = model_model_layers_0_self_attn_q_norm_weight_to_fp16, x = input_5)[name = string("op_1652_cast_fp16")]; tensor mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor([-1])]; bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)]; tensor x_3 = transpose(perm = var_1624, x = var_1619)[name = string("transpose_251")]; tensor mean_5 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = x_3)[name = string("mean_5")]; tensor input_7 = sub(x = x_3, y = mean_5)[name = string("input_7")]; tensor var_1670_axes_0 = const()[name = string("op_1670_axes_0"), val = tensor([-1])]; tensor model_model_layers_0_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_0_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010722368)))]; fp16 var_1658_to_fp16 = const()[name = string("op_1658_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1670_cast_fp16 = layer_norm(axes = var_1670_axes_0, epsilon = var_1658_to_fp16, gamma = model_model_layers_0_self_attn_k_norm_weight_to_fp16, x = input_7)[name = string("op_1670_cast_fp16")]; tensor var_1677 = const()[name = string("op_1677"), val = tensor([0, 2, 1, 3])]; tensor var_1683 = const()[name = string("op_1683"), val = tensor([0, 2, 1, 3])]; tensor cos_5 = transpose(perm = var_1677, x = cos_1_cast_fp16)[name = string("transpose_250")]; tensor var_1685 = mul(x = var_1652_cast_fp16, y = cos_5)[name = string("op_1685")]; tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = var_1652_cast_fp16)[name = string("x1_1")]; tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = var_1652_cast_fp16)[name = string("x2_1")]; fp16 const_7_promoted = const()[name = string("const_7_promoted"), val = fp16(-0x1p+0)]; tensor var_1706 = mul(x = x2_1, y = const_7_promoted)[name = string("op_1706")]; int32 var_1708 = const()[name = string("op_1708"), val = int32(-1)]; bool var_1709_interleave_0 = const()[name = string("op_1709_interleave_0"), val = bool(false)]; tensor var_1709 = concat(axis = var_1708, interleave = var_1709_interleave_0, values = (var_1706, x1_1))[name = string("op_1709")]; tensor sin_5 = transpose(perm = var_1683, x = sin_1_cast_fp16)[name = string("transpose_249")]; tensor var_1710 = mul(x = var_1709, y = sin_5)[name = string("op_1710")]; tensor query_states_3 = add(x = var_1685, y = var_1710)[name = string("query_states_3")]; tensor var_1713 = mul(x = var_1670_cast_fp16, y = cos_5)[name = string("op_1713")]; tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = var_1670_cast_fp16)[name = string("x1_3")]; tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = var_1670_cast_fp16)[name = string("x2_3")]; fp16 const_10_promoted = const()[name = string("const_10_promoted"), val = fp16(-0x1p+0)]; tensor var_1734 = mul(x = x2_3, y = const_10_promoted)[name = string("op_1734")]; int32 var_1736 = const()[name = string("op_1736"), val = int32(-1)]; bool var_1737_interleave_0 = const()[name = string("op_1737_interleave_0"), val = bool(false)]; tensor var_1737 = concat(axis = var_1736, interleave = var_1737_interleave_0, values = (var_1734, x1_3))[name = string("op_1737")]; tensor var_1738 = mul(x = var_1737, y = sin_5)[name = string("op_1738")]; tensor key_states_3 = add(x = var_1713, y = var_1738)[name = string("key_states_3")]; tensor seq_length_1 = const()[name = string("seq_length_1"), val = tensor([64])]; tensor var_1760 = add(x = current_pos, y = seq_length_1)[name = string("op_1760")]; tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_1760, concat_3_values3_0))[name = string("concat_3")]; tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = key_states_3, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_56_write_state")]; tensor coreml_update_state_56 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_56")]; tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([28])]; tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([29])]; int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_1760, concat_7_values3_0))[name = string("concat_7")]; tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_3 = transpose(perm = var_1635, x = var_1630)[name = string("transpose_248")]; tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = value_states_3, x = coreml_update_state_56)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_57_write_state")]; tensor coreml_update_state_57 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_57")]; tensor var_1809_begin_0 = const()[name = string("op_1809_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1809_end_0 = const()[name = string("op_1809_end_0"), val = tensor([1, 8, 1024, 128])]; tensor var_1809_end_mask_0 = const()[name = string("op_1809_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1809_cast_fp16 = slice_by_index(begin = var_1809_begin_0, end = var_1809_end_0, end_mask = var_1809_end_mask_0, x = coreml_update_state_57)[name = string("op_1809_cast_fp16")]; tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_1809_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; tensor var_1816_begin_0 = const()[name = string("op_1816_begin_0"), val = tensor([28, 0, 0, 0])]; tensor var_1816_end_0 = const()[name = string("op_1816_end_0"), val = tensor([29, 8, 1024, 128])]; tensor var_1816_end_mask_0 = const()[name = string("op_1816_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1816_cast_fp16 = slice_by_index(begin = var_1816_begin_0, end = var_1816_end_0, end_mask = var_1816_end_mask_0, x = coreml_update_state_57)[name = string("op_1816_cast_fp16")]; tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_1816_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; tensor x_7_axes_0 = const()[name = string("x_7_axes_0"), val = tensor([1])]; tensor x_7_cast_fp16 = expand_dims(axes = x_7_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_7_cast_fp16")]; tensor var_1845 = const()[name = string("op_1845"), val = tensor([1, 2, 1, 1])]; tensor x_9_cast_fp16 = tile(reps = var_1845, x = x_7_cast_fp16)[name = string("x_9_cast_fp16")]; tensor var_1857 = const()[name = string("op_1857"), val = tensor([1, -1, 1024, 128])]; tensor key_states_7_cast_fp16 = reshape(shape = var_1857, x = x_9_cast_fp16)[name = string("key_states_7_cast_fp16")]; tensor x_13_axes_0 = const()[name = string("x_13_axes_0"), val = tensor([1])]; tensor x_13_cast_fp16 = expand_dims(axes = x_13_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_13_cast_fp16")]; tensor var_1865 = const()[name = string("op_1865"), val = tensor([1, 2, 1, 1])]; tensor x_15_cast_fp16 = tile(reps = var_1865, x = x_13_cast_fp16)[name = string("x_15_cast_fp16")]; bool var_1892_transpose_x_0 = const()[name = string("op_1892_transpose_x_0"), val = bool(false)]; bool var_1892_transpose_y_0 = const()[name = string("op_1892_transpose_y_0"), val = bool(true)]; tensor var_1892 = matmul(transpose_x = var_1892_transpose_x_0, transpose_y = var_1892_transpose_y_0, x = query_states_3, y = key_states_7_cast_fp16)[name = string("op_1892")]; fp16 var_1893_to_fp16 = const()[name = string("op_1893_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_1_cast_fp16 = mul(x = var_1892, y = var_1893_to_fp16)[name = string("attn_weights_1_cast_fp16")]; tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("attn_weights_3_cast_fp16")]; int32 var_1928 = const()[name = string("op_1928"), val = int32(-1)]; tensor var_1930_cast_fp16 = softmax(axis = var_1928, x = attn_weights_3_cast_fp16)[name = string("op_1930_cast_fp16")]; tensor concat_12 = const()[name = string("concat_12"), val = tensor([16, 64, 1024])]; tensor reshape_0_cast_fp16 = reshape(shape = concat_12, x = var_1930_cast_fp16)[name = string("reshape_0_cast_fp16")]; tensor concat_13 = const()[name = string("concat_13"), val = tensor([16, 1024, 128])]; tensor reshape_1_cast_fp16 = reshape(shape = concat_13, x = x_15_cast_fp16)[name = string("reshape_1_cast_fp16")]; bool matmul_0_transpose_x_0 = const()[name = string("matmul_0_transpose_x_0"), val = bool(false)]; bool matmul_0_transpose_y_0 = const()[name = string("matmul_0_transpose_y_0"), val = bool(false)]; tensor matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = reshape_0_cast_fp16, y = reshape_1_cast_fp16)[name = string("matmul_0_cast_fp16")]; tensor concat_17 = const()[name = string("concat_17"), val = tensor([1, 16, 64, 128])]; tensor reshape_2_cast_fp16 = reshape(shape = concat_17, x = matmul_0_cast_fp16)[name = string("reshape_2_cast_fp16")]; tensor var_1942_perm_0 = const()[name = string("op_1942_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1961 = const()[name = string("op_1961"), val = tensor([1, 64, 2048])]; tensor var_1942_cast_fp16 = transpose(perm = var_1942_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_247")]; tensor attn_output_5_cast_fp16 = reshape(shape = var_1961, x = var_1942_cast_fp16)[name = string("attn_output_5_cast_fp16")]; tensor var_1966 = const()[name = string("op_1966"), val = tensor([0, 2, 1])]; string var_1982_pad_type_0 = const()[name = string("op_1982_pad_type_0"), val = string("valid")]; int32 var_1982_groups_0 = const()[name = string("op_1982_groups_0"), val = int32(1)]; tensor var_1982_strides_0 = const()[name = string("op_1982_strides_0"), val = tensor([1])]; tensor var_1982_pad_0 = const()[name = string("op_1982_pad_0"), val = tensor([0, 0])]; tensor var_1982_dilations_0 = const()[name = string("op_1982_dilations_0"), val = tensor([1])]; tensor squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010722688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1013868480))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_1967_cast_fp16 = transpose(perm = var_1966, x = attn_output_5_cast_fp16)[name = string("transpose_246")]; tensor var_1982_cast_fp16 = conv(dilations = var_1982_dilations_0, groups = var_1982_groups_0, pad = var_1982_pad_0, pad_type = var_1982_pad_type_0, strides = var_1982_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_1967_cast_fp16)[name = string("op_1982_cast_fp16")]; tensor var_1986 = const()[name = string("op_1986"), val = tensor([0, 2, 1])]; tensor attn_output_9_cast_fp16 = transpose(perm = var_1986, x = var_1982_cast_fp16)[name = string("transpose_245")]; tensor hidden_states_5_cast_fp16 = add(x = hidden_states, y = attn_output_9_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; tensor mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor([-1])]; bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)]; tensor mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_7_cast_fp16")]; tensor input_11_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_7_cast_fp16)[name = string("input_11_cast_fp16")]; tensor var_2005_axes_0 = const()[name = string("op_2005_axes_0"), val = tensor([-1])]; tensor model_model_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1013901312)))]; fp16 var_1993_to_fp16 = const()[name = string("op_1993_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2005_cast_fp16 = layer_norm(axes = var_2005_axes_0, epsilon = var_1993_to_fp16, gamma = model_model_layers_0_post_attention_layernorm_weight_to_fp16, x = input_11_cast_fp16)[name = string("op_2005_cast_fp16")]; tensor var_2019 = const()[name = string("op_2019"), val = tensor([0, 2, 1])]; tensor input_13_axes_0 = const()[name = string("input_13_axes_0"), val = tensor([2])]; tensor var_2020 = transpose(perm = var_2019, x = var_2005_cast_fp16)[name = string("transpose_244")]; tensor input_13 = expand_dims(axes = input_13_axes_0, x = var_2020)[name = string("input_13")]; string input_15_pad_type_0 = const()[name = string("input_15_pad_type_0"), val = string("valid")]; tensor input_15_strides_0 = const()[name = string("input_15_strides_0"), val = tensor([1, 1])]; tensor input_15_pad_0 = const()[name = string("input_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_15_dilations_0 = const()[name = string("input_15_dilations_0"), val = tensor([1, 1])]; int32 input_15_groups_0 = const()[name = string("input_15_groups_0"), val = int32(1)]; tensor input_15 = conv(dilations = input_15_dilations_0, groups = input_15_groups_0, pad = input_15_pad_0, pad_type = input_15_pad_type_0, strides = input_15_strides_0, weight = model_model_layers_0_mlp_gate_proj_weight_palettized, x = input_13)[name = string("input_15")]; string b_1_pad_type_0 = const()[name = string("b_1_pad_type_0"), val = string("valid")]; tensor b_1_strides_0 = const()[name = string("b_1_strides_0"), val = tensor([1, 1])]; tensor b_1_pad_0 = const()[name = string("b_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_1_dilations_0 = const()[name = string("b_1_dilations_0"), val = tensor([1, 1])]; int32 b_1_groups_0 = const()[name = string("b_1_groups_0"), val = int32(1)]; tensor b_1 = conv(dilations = b_1_dilations_0, groups = b_1_groups_0, pad = b_1_pad_0, pad_type = b_1_pad_type_0, strides = b_1_strides_0, weight = model_model_layers_0_mlp_up_proj_weight_palettized, x = input_13)[name = string("b_1")]; tensor c_1 = silu(x = input_15)[name = string("c_1")]; tensor input_17 = mul(x = c_1, y = b_1)[name = string("input_17")]; string e_1_pad_type_0 = const()[name = string("e_1_pad_type_0"), val = string("valid")]; tensor e_1_strides_0 = const()[name = string("e_1_strides_0"), val = tensor([1, 1])]; tensor e_1_pad_0 = const()[name = string("e_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_1_dilations_0 = const()[name = string("e_1_dilations_0"), val = tensor([1, 1])]; int32 e_1_groups_0 = const()[name = string("e_1_groups_0"), val = int32(1)]; tensor e_1 = conv(dilations = e_1_dilations_0, groups = e_1_groups_0, pad = e_1_pad_0, pad_type = e_1_pad_type_0, strides = e_1_strides_0, weight = model_model_layers_0_mlp_down_proj_weight_palettized, x = input_17)[name = string("e_1")]; tensor var_2042_axes_0 = const()[name = string("op_2042_axes_0"), val = tensor([2])]; tensor var_2042 = squeeze(axes = var_2042_axes_0, x = e_1)[name = string("op_2042")]; tensor var_2043 = const()[name = string("op_2043"), val = tensor([0, 2, 1])]; tensor var_2044 = transpose(perm = var_2043, x = var_2042)[name = string("transpose_243")]; tensor hidden_states_7_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_2044)[name = string("hidden_states_7_cast_fp16")]; tensor mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor([-1])]; bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)]; tensor mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_7_cast_fp16)[name = string("mean_9_cast_fp16")]; tensor input_19_cast_fp16 = sub(x = hidden_states_7_cast_fp16, y = mean_9_cast_fp16)[name = string("input_19_cast_fp16")]; tensor var_2062_axes_0 = const()[name = string("op_2062_axes_0"), val = tensor([-1])]; tensor model_model_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1013905472)))]; fp16 var_2050_to_fp16 = const()[name = string("op_2050_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2062_cast_fp16 = layer_norm(axes = var_2062_axes_0, epsilon = var_2050_to_fp16, gamma = model_model_layers_1_input_layernorm_weight_to_fp16, x = input_19_cast_fp16)[name = string("op_2062_cast_fp16")]; tensor var_2074 = const()[name = string("op_2074"), val = tensor([0, 2, 1])]; tensor var_2077_axes_0 = const()[name = string("op_2077_axes_0"), val = tensor([2])]; tensor var_2075 = transpose(perm = var_2074, x = var_2062_cast_fp16)[name = string("transpose_242")]; tensor var_2077 = expand_dims(axes = var_2077_axes_0, x = var_2075)[name = string("op_2077")]; string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; tensor query_states_9 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = model_model_layers_1_self_attn_q_proj_weight_palettized, x = var_2077)[name = string("query_states_9")]; string key_states_11_pad_type_0 = const()[name = string("key_states_11_pad_type_0"), val = string("valid")]; tensor key_states_11_strides_0 = const()[name = string("key_states_11_strides_0"), val = tensor([1, 1])]; tensor key_states_11_pad_0 = const()[name = string("key_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_11_dilations_0 = const()[name = string("key_states_11_dilations_0"), val = tensor([1, 1])]; int32 key_states_11_groups_0 = const()[name = string("key_states_11_groups_0"), val = int32(1)]; tensor key_states_11 = conv(dilations = key_states_11_dilations_0, groups = key_states_11_groups_0, pad = key_states_11_pad_0, pad_type = key_states_11_pad_type_0, strides = key_states_11_strides_0, weight = model_model_layers_1_self_attn_k_proj_weight_palettized, x = var_2077)[name = string("key_states_11")]; string value_states_9_pad_type_0 = const()[name = string("value_states_9_pad_type_0"), val = string("valid")]; tensor value_states_9_strides_0 = const()[name = string("value_states_9_strides_0"), val = tensor([1, 1])]; tensor value_states_9_pad_0 = const()[name = string("value_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_9_dilations_0 = const()[name = string("value_states_9_dilations_0"), val = tensor([1, 1])]; int32 value_states_9_groups_0 = const()[name = string("value_states_9_groups_0"), val = int32(1)]; tensor value_states_9 = conv(dilations = value_states_9_dilations_0, groups = value_states_9_groups_0, pad = value_states_9_pad_0, pad_type = value_states_9_pad_type_0, strides = value_states_9_strides_0, weight = model_model_layers_1_self_attn_v_proj_weight_palettized, x = var_2077)[name = string("value_states_9")]; tensor var_2119 = const()[name = string("op_2119"), val = tensor([1, 16, 128, 64])]; tensor var_2120 = reshape(shape = var_2119, x = query_states_9)[name = string("op_2120")]; tensor var_2125 = const()[name = string("op_2125"), val = tensor([0, 1, 3, 2])]; tensor var_2130 = const()[name = string("op_2130"), val = tensor([1, 8, 128, 64])]; tensor var_2131 = reshape(shape = var_2130, x = key_states_11)[name = string("op_2131")]; tensor var_2136 = const()[name = string("op_2136"), val = tensor([0, 1, 3, 2])]; tensor var_2141 = const()[name = string("op_2141"), val = tensor([1, 8, 128, 64])]; tensor var_2142 = reshape(shape = var_2141, x = value_states_9)[name = string("op_2142")]; tensor var_2147 = const()[name = string("op_2147"), val = tensor([0, 1, 3, 2])]; tensor mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor([-1])]; bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)]; tensor x_21 = transpose(perm = var_2125, x = var_2120)[name = string("transpose_241")]; tensor mean_11 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = x_21)[name = string("mean_11")]; tensor input_23 = sub(x = x_21, y = mean_11)[name = string("input_23")]; tensor var_2164_axes_0 = const()[name = string("op_2164_axes_0"), val = tensor([-1])]; tensor model_model_layers_1_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_1_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1013909632)))]; fp16 var_2152_to_fp16 = const()[name = string("op_2152_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2164_cast_fp16 = layer_norm(axes = var_2164_axes_0, epsilon = var_2152_to_fp16, gamma = model_model_layers_1_self_attn_q_norm_weight_to_fp16, x = input_23)[name = string("op_2164_cast_fp16")]; tensor mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor([-1])]; bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)]; tensor x_23 = transpose(perm = var_2136, x = var_2131)[name = string("transpose_240")]; tensor mean_13 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = x_23)[name = string("mean_13")]; tensor input_25 = sub(x = x_23, y = mean_13)[name = string("input_25")]; tensor var_2182_axes_0 = const()[name = string("op_2182_axes_0"), val = tensor([-1])]; tensor model_model_layers_1_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_1_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1013909952)))]; fp16 var_2170_to_fp16 = const()[name = string("op_2170_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2182_cast_fp16 = layer_norm(axes = var_2182_axes_0, epsilon = var_2170_to_fp16, gamma = model_model_layers_1_self_attn_k_norm_weight_to_fp16, x = input_25)[name = string("op_2182_cast_fp16")]; tensor var_2197 = mul(x = var_2164_cast_fp16, y = cos_5)[name = string("op_2197")]; tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = var_2164_cast_fp16)[name = string("x1_5")]; tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = var_2164_cast_fp16)[name = string("x2_5")]; fp16 const_29_promoted = const()[name = string("const_29_promoted"), val = fp16(-0x1p+0)]; tensor var_2218 = mul(x = x2_5, y = const_29_promoted)[name = string("op_2218")]; int32 var_2220 = const()[name = string("op_2220"), val = int32(-1)]; bool var_2221_interleave_0 = const()[name = string("op_2221_interleave_0"), val = bool(false)]; tensor var_2221 = concat(axis = var_2220, interleave = var_2221_interleave_0, values = (var_2218, x1_5))[name = string("op_2221")]; tensor var_2222 = mul(x = var_2221, y = sin_5)[name = string("op_2222")]; tensor query_states_11 = add(x = var_2197, y = var_2222)[name = string("query_states_11")]; tensor var_2225 = mul(x = var_2182_cast_fp16, y = cos_5)[name = string("op_2225")]; tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = var_2182_cast_fp16)[name = string("x1_7")]; tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = var_2182_cast_fp16)[name = string("x2_7")]; fp16 const_32_promoted = const()[name = string("const_32_promoted"), val = fp16(-0x1p+0)]; tensor var_2246 = mul(x = x2_7, y = const_32_promoted)[name = string("op_2246")]; int32 var_2248 = const()[name = string("op_2248"), val = int32(-1)]; bool var_2249_interleave_0 = const()[name = string("op_2249_interleave_0"), val = bool(false)]; tensor var_2249 = concat(axis = var_2248, interleave = var_2249_interleave_0, values = (var_2246, x1_7))[name = string("op_2249")]; tensor var_2250 = mul(x = var_2249, y = sin_5)[name = string("op_2250")]; tensor key_states_13 = add(x = var_2225, y = var_2250)[name = string("key_states_13")]; tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([1])]; tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([2])]; int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_20")]; tensor concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor([0])]; tensor concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor([0])]; int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)]; bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)]; tensor concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (expand_dims_16, concat_21_values1_0, var_1760, concat_21_values3_0))[name = string("concat_21")]; tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = key_states_13, x = coreml_update_state_57)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_58_write_state")]; tensor coreml_update_state_58 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_58")]; tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([29])]; tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([30])]; int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)]; bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)]; tensor concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_24")]; tensor concat_25_values1_0 = const()[name = string("concat_25_values1_0"), val = tensor([0])]; tensor concat_25_values3_0 = const()[name = string("concat_25_values3_0"), val = tensor([0])]; int32 concat_25_axis_0 = const()[name = string("concat_25_axis_0"), val = int32(0)]; bool concat_25_interleave_0 = const()[name = string("concat_25_interleave_0"), val = bool(false)]; tensor concat_25 = concat(axis = concat_25_axis_0, interleave = concat_25_interleave_0, values = (expand_dims_22, concat_25_values1_0, var_1760, concat_25_values3_0))[name = string("concat_25")]; tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_11 = transpose(perm = var_2147, x = var_2142)[name = string("transpose_239")]; tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_24, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_25, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = value_states_11, x = coreml_update_state_58)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_59_write_state")]; tensor coreml_update_state_59 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_59")]; tensor var_2321_begin_0 = const()[name = string("op_2321_begin_0"), val = tensor([1, 0, 0, 0])]; tensor var_2321_end_0 = const()[name = string("op_2321_end_0"), val = tensor([2, 8, 1024, 128])]; tensor var_2321_end_mask_0 = const()[name = string("op_2321_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2321_cast_fp16 = slice_by_index(begin = var_2321_begin_0, end = var_2321_end_0, end_mask = var_2321_end_mask_0, x = coreml_update_state_59)[name = string("op_2321_cast_fp16")]; tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_2321_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; tensor var_2328_begin_0 = const()[name = string("op_2328_begin_0"), val = tensor([29, 0, 0, 0])]; tensor var_2328_end_0 = const()[name = string("op_2328_end_0"), val = tensor([30, 8, 1024, 128])]; tensor var_2328_end_mask_0 = const()[name = string("op_2328_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2328_cast_fp16 = slice_by_index(begin = var_2328_begin_0, end = var_2328_end_0, end_mask = var_2328_end_mask_0, x = coreml_update_state_59)[name = string("op_2328_cast_fp16")]; tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_2328_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; tensor x_27_axes_0 = const()[name = string("x_27_axes_0"), val = tensor([1])]; tensor x_27_cast_fp16 = expand_dims(axes = x_27_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_27_cast_fp16")]; tensor var_2357 = const()[name = string("op_2357"), val = tensor([1, 2, 1, 1])]; tensor x_29_cast_fp16 = tile(reps = var_2357, x = x_27_cast_fp16)[name = string("x_29_cast_fp16")]; tensor var_2369 = const()[name = string("op_2369"), val = tensor([1, -1, 1024, 128])]; tensor key_states_17_cast_fp16 = reshape(shape = var_2369, x = x_29_cast_fp16)[name = string("key_states_17_cast_fp16")]; tensor x_33_axes_0 = const()[name = string("x_33_axes_0"), val = tensor([1])]; tensor x_33_cast_fp16 = expand_dims(axes = x_33_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_33_cast_fp16")]; tensor var_2377 = const()[name = string("op_2377"), val = tensor([1, 2, 1, 1])]; tensor x_35_cast_fp16 = tile(reps = var_2377, x = x_33_cast_fp16)[name = string("x_35_cast_fp16")]; bool var_2404_transpose_x_0 = const()[name = string("op_2404_transpose_x_0"), val = bool(false)]; bool var_2404_transpose_y_0 = const()[name = string("op_2404_transpose_y_0"), val = bool(true)]; tensor var_2404 = matmul(transpose_x = var_2404_transpose_x_0, transpose_y = var_2404_transpose_y_0, x = query_states_11, y = key_states_17_cast_fp16)[name = string("op_2404")]; fp16 var_2405_to_fp16 = const()[name = string("op_2405_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_5_cast_fp16 = mul(x = var_2404, y = var_2405_to_fp16)[name = string("attn_weights_5_cast_fp16")]; tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("attn_weights_7_cast_fp16")]; int32 var_2440 = const()[name = string("op_2440"), val = int32(-1)]; tensor var_2442_cast_fp16 = softmax(axis = var_2440, x = attn_weights_7_cast_fp16)[name = string("op_2442_cast_fp16")]; tensor concat_30 = const()[name = string("concat_30"), val = tensor([16, 64, 1024])]; tensor reshape_3_cast_fp16 = reshape(shape = concat_30, x = var_2442_cast_fp16)[name = string("reshape_3_cast_fp16")]; tensor concat_31 = const()[name = string("concat_31"), val = tensor([16, 1024, 128])]; tensor reshape_4_cast_fp16 = reshape(shape = concat_31, x = x_35_cast_fp16)[name = string("reshape_4_cast_fp16")]; bool matmul_1_transpose_x_0 = const()[name = string("matmul_1_transpose_x_0"), val = bool(false)]; bool matmul_1_transpose_y_0 = const()[name = string("matmul_1_transpose_y_0"), val = bool(false)]; tensor matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = reshape_3_cast_fp16, y = reshape_4_cast_fp16)[name = string("matmul_1_cast_fp16")]; tensor concat_35 = const()[name = string("concat_35"), val = tensor([1, 16, 64, 128])]; tensor reshape_5_cast_fp16 = reshape(shape = concat_35, x = matmul_1_cast_fp16)[name = string("reshape_5_cast_fp16")]; tensor var_2454_perm_0 = const()[name = string("op_2454_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2473 = const()[name = string("op_2473"), val = tensor([1, 64, 2048])]; tensor var_2454_cast_fp16 = transpose(perm = var_2454_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_238")]; tensor attn_output_15_cast_fp16 = reshape(shape = var_2473, x = var_2454_cast_fp16)[name = string("attn_output_15_cast_fp16")]; tensor var_2478 = const()[name = string("op_2478"), val = tensor([0, 2, 1])]; string var_2494_pad_type_0 = const()[name = string("op_2494_pad_type_0"), val = string("valid")]; int32 var_2494_groups_0 = const()[name = string("op_2494_groups_0"), val = int32(1)]; tensor var_2494_strides_0 = const()[name = string("op_2494_strides_0"), val = tensor([1])]; tensor var_2494_pad_0 = const()[name = string("op_2494_pad_0"), val = tensor([0, 0])]; tensor var_2494_dilations_0 = const()[name = string("op_2494_dilations_0"), val = tensor([1])]; tensor squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1013910272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1017056064))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_2479_cast_fp16 = transpose(perm = var_2478, x = attn_output_15_cast_fp16)[name = string("transpose_237")]; tensor var_2494_cast_fp16 = conv(dilations = var_2494_dilations_0, groups = var_2494_groups_0, pad = var_2494_pad_0, pad_type = var_2494_pad_type_0, strides = var_2494_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_2479_cast_fp16)[name = string("op_2494_cast_fp16")]; tensor var_2498 = const()[name = string("op_2498"), val = tensor([0, 2, 1])]; tensor attn_output_19_cast_fp16 = transpose(perm = var_2498, x = var_2494_cast_fp16)[name = string("transpose_236")]; tensor hidden_states_11_cast_fp16 = add(x = hidden_states_7_cast_fp16, y = attn_output_19_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; tensor mean_15_axes_0 = const()[name = string("mean_15_axes_0"), val = tensor([-1])]; bool mean_15_keep_dims_0 = const()[name = string("mean_15_keep_dims_0"), val = bool(true)]; tensor mean_15_cast_fp16 = reduce_mean(axes = mean_15_axes_0, keep_dims = mean_15_keep_dims_0, x = hidden_states_11_cast_fp16)[name = string("mean_15_cast_fp16")]; tensor input_29_cast_fp16 = sub(x = hidden_states_11_cast_fp16, y = mean_15_cast_fp16)[name = string("input_29_cast_fp16")]; tensor var_2517_axes_0 = const()[name = string("op_2517_axes_0"), val = tensor([-1])]; tensor model_model_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1017088896)))]; fp16 var_2505_to_fp16 = const()[name = string("op_2505_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2517_cast_fp16 = layer_norm(axes = var_2517_axes_0, epsilon = var_2505_to_fp16, gamma = model_model_layers_1_post_attention_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_2517_cast_fp16")]; tensor var_2531 = const()[name = string("op_2531"), val = tensor([0, 2, 1])]; tensor input_31_axes_0 = const()[name = string("input_31_axes_0"), val = tensor([2])]; tensor var_2532 = transpose(perm = var_2531, x = var_2517_cast_fp16)[name = string("transpose_235")]; tensor input_31 = expand_dims(axes = input_31_axes_0, x = var_2532)[name = string("input_31")]; string input_33_pad_type_0 = const()[name = string("input_33_pad_type_0"), val = string("valid")]; tensor input_33_strides_0 = const()[name = string("input_33_strides_0"), val = tensor([1, 1])]; tensor input_33_pad_0 = const()[name = string("input_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_33_dilations_0 = const()[name = string("input_33_dilations_0"), val = tensor([1, 1])]; int32 input_33_groups_0 = const()[name = string("input_33_groups_0"), val = int32(1)]; tensor input_33 = conv(dilations = input_33_dilations_0, groups = input_33_groups_0, pad = input_33_pad_0, pad_type = input_33_pad_type_0, strides = input_33_strides_0, weight = model_model_layers_1_mlp_gate_proj_weight_palettized, x = input_31)[name = string("input_33")]; string b_3_pad_type_0 = const()[name = string("b_3_pad_type_0"), val = string("valid")]; tensor b_3_strides_0 = const()[name = string("b_3_strides_0"), val = tensor([1, 1])]; tensor b_3_pad_0 = const()[name = string("b_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_3_dilations_0 = const()[name = string("b_3_dilations_0"), val = tensor([1, 1])]; int32 b_3_groups_0 = const()[name = string("b_3_groups_0"), val = int32(1)]; tensor b_3 = conv(dilations = b_3_dilations_0, groups = b_3_groups_0, pad = b_3_pad_0, pad_type = b_3_pad_type_0, strides = b_3_strides_0, weight = model_model_layers_1_mlp_up_proj_weight_palettized, x = input_31)[name = string("b_3")]; tensor c_3 = silu(x = input_33)[name = string("c_3")]; tensor input_35 = mul(x = c_3, y = b_3)[name = string("input_35")]; string e_3_pad_type_0 = const()[name = string("e_3_pad_type_0"), val = string("valid")]; tensor e_3_strides_0 = const()[name = string("e_3_strides_0"), val = tensor([1, 1])]; tensor e_3_pad_0 = const()[name = string("e_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_3_dilations_0 = const()[name = string("e_3_dilations_0"), val = tensor([1, 1])]; int32 e_3_groups_0 = const()[name = string("e_3_groups_0"), val = int32(1)]; tensor e_3 = conv(dilations = e_3_dilations_0, groups = e_3_groups_0, pad = e_3_pad_0, pad_type = e_3_pad_type_0, strides = e_3_strides_0, weight = model_model_layers_1_mlp_down_proj_weight_palettized, x = input_35)[name = string("e_3")]; tensor var_2554_axes_0 = const()[name = string("op_2554_axes_0"), val = tensor([2])]; tensor var_2554 = squeeze(axes = var_2554_axes_0, x = e_3)[name = string("op_2554")]; tensor var_2555 = const()[name = string("op_2555"), val = tensor([0, 2, 1])]; tensor var_2556 = transpose(perm = var_2555, x = var_2554)[name = string("transpose_234")]; tensor hidden_states_13_cast_fp16 = add(x = hidden_states_11_cast_fp16, y = var_2556)[name = string("hidden_states_13_cast_fp16")]; tensor mean_17_axes_0 = const()[name = string("mean_17_axes_0"), val = tensor([-1])]; bool mean_17_keep_dims_0 = const()[name = string("mean_17_keep_dims_0"), val = bool(true)]; tensor mean_17_cast_fp16 = reduce_mean(axes = mean_17_axes_0, keep_dims = mean_17_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_17_cast_fp16")]; tensor input_37_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_17_cast_fp16)[name = string("input_37_cast_fp16")]; tensor var_2574_axes_0 = const()[name = string("op_2574_axes_0"), val = tensor([-1])]; tensor model_model_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1017093056)))]; fp16 var_2562_to_fp16 = const()[name = string("op_2562_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2574_cast_fp16 = layer_norm(axes = var_2574_axes_0, epsilon = var_2562_to_fp16, gamma = model_model_layers_2_input_layernorm_weight_to_fp16, x = input_37_cast_fp16)[name = string("op_2574_cast_fp16")]; tensor var_2586 = const()[name = string("op_2586"), val = tensor([0, 2, 1])]; tensor var_2589_axes_0 = const()[name = string("op_2589_axes_0"), val = tensor([2])]; tensor var_2587 = transpose(perm = var_2586, x = var_2574_cast_fp16)[name = string("transpose_233")]; tensor var_2589 = expand_dims(axes = var_2589_axes_0, x = var_2587)[name = string("op_2589")]; string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; tensor query_states_17 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = model_model_layers_2_self_attn_q_proj_weight_palettized, x = var_2589)[name = string("query_states_17")]; string key_states_21_pad_type_0 = const()[name = string("key_states_21_pad_type_0"), val = string("valid")]; tensor key_states_21_strides_0 = const()[name = string("key_states_21_strides_0"), val = tensor([1, 1])]; tensor key_states_21_pad_0 = const()[name = string("key_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_21_dilations_0 = const()[name = string("key_states_21_dilations_0"), val = tensor([1, 1])]; int32 key_states_21_groups_0 = const()[name = string("key_states_21_groups_0"), val = int32(1)]; tensor key_states_21 = conv(dilations = key_states_21_dilations_0, groups = key_states_21_groups_0, pad = key_states_21_pad_0, pad_type = key_states_21_pad_type_0, strides = key_states_21_strides_0, weight = model_model_layers_2_self_attn_k_proj_weight_palettized, x = var_2589)[name = string("key_states_21")]; string value_states_17_pad_type_0 = const()[name = string("value_states_17_pad_type_0"), val = string("valid")]; tensor value_states_17_strides_0 = const()[name = string("value_states_17_strides_0"), val = tensor([1, 1])]; tensor value_states_17_pad_0 = const()[name = string("value_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_17_dilations_0 = const()[name = string("value_states_17_dilations_0"), val = tensor([1, 1])]; int32 value_states_17_groups_0 = const()[name = string("value_states_17_groups_0"), val = int32(1)]; tensor value_states_17 = conv(dilations = value_states_17_dilations_0, groups = value_states_17_groups_0, pad = value_states_17_pad_0, pad_type = value_states_17_pad_type_0, strides = value_states_17_strides_0, weight = model_model_layers_2_self_attn_v_proj_weight_palettized, x = var_2589)[name = string("value_states_17")]; tensor var_2631 = const()[name = string("op_2631"), val = tensor([1, 16, 128, 64])]; tensor var_2632 = reshape(shape = var_2631, x = query_states_17)[name = string("op_2632")]; tensor var_2637 = const()[name = string("op_2637"), val = tensor([0, 1, 3, 2])]; tensor var_2642 = const()[name = string("op_2642"), val = tensor([1, 8, 128, 64])]; tensor var_2643 = reshape(shape = var_2642, x = key_states_21)[name = string("op_2643")]; tensor var_2648 = const()[name = string("op_2648"), val = tensor([0, 1, 3, 2])]; tensor var_2653 = const()[name = string("op_2653"), val = tensor([1, 8, 128, 64])]; tensor var_2654 = reshape(shape = var_2653, x = value_states_17)[name = string("op_2654")]; tensor var_2659 = const()[name = string("op_2659"), val = tensor([0, 1, 3, 2])]; tensor mean_19_axes_0 = const()[name = string("mean_19_axes_0"), val = tensor([-1])]; bool mean_19_keep_dims_0 = const()[name = string("mean_19_keep_dims_0"), val = bool(true)]; tensor x_41 = transpose(perm = var_2637, x = var_2632)[name = string("transpose_232")]; tensor mean_19 = reduce_mean(axes = mean_19_axes_0, keep_dims = mean_19_keep_dims_0, x = x_41)[name = string("mean_19")]; tensor input_41 = sub(x = x_41, y = mean_19)[name = string("input_41")]; tensor var_2676_axes_0 = const()[name = string("op_2676_axes_0"), val = tensor([-1])]; tensor model_model_layers_2_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_2_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1017097216)))]; fp16 var_2664_to_fp16 = const()[name = string("op_2664_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2676_cast_fp16 = layer_norm(axes = var_2676_axes_0, epsilon = var_2664_to_fp16, gamma = model_model_layers_2_self_attn_q_norm_weight_to_fp16, x = input_41)[name = string("op_2676_cast_fp16")]; tensor mean_21_axes_0 = const()[name = string("mean_21_axes_0"), val = tensor([-1])]; bool mean_21_keep_dims_0 = const()[name = string("mean_21_keep_dims_0"), val = bool(true)]; tensor x_43 = transpose(perm = var_2648, x = var_2643)[name = string("transpose_231")]; tensor mean_21 = reduce_mean(axes = mean_21_axes_0, keep_dims = mean_21_keep_dims_0, x = x_43)[name = string("mean_21")]; tensor input_43 = sub(x = x_43, y = mean_21)[name = string("input_43")]; tensor var_2694_axes_0 = const()[name = string("op_2694_axes_0"), val = tensor([-1])]; tensor model_model_layers_2_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_2_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1017097536)))]; fp16 var_2682_to_fp16 = const()[name = string("op_2682_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2694_cast_fp16 = layer_norm(axes = var_2694_axes_0, epsilon = var_2682_to_fp16, gamma = model_model_layers_2_self_attn_k_norm_weight_to_fp16, x = input_43)[name = string("op_2694_cast_fp16")]; tensor var_2709 = mul(x = var_2676_cast_fp16, y = cos_5)[name = string("op_2709")]; tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = var_2676_cast_fp16)[name = string("x1_9")]; tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = var_2676_cast_fp16)[name = string("x2_9")]; fp16 const_51_promoted = const()[name = string("const_51_promoted"), val = fp16(-0x1p+0)]; tensor var_2730 = mul(x = x2_9, y = const_51_promoted)[name = string("op_2730")]; int32 var_2732 = const()[name = string("op_2732"), val = int32(-1)]; bool var_2733_interleave_0 = const()[name = string("op_2733_interleave_0"), val = bool(false)]; tensor var_2733 = concat(axis = var_2732, interleave = var_2733_interleave_0, values = (var_2730, x1_9))[name = string("op_2733")]; tensor var_2734 = mul(x = var_2733, y = sin_5)[name = string("op_2734")]; tensor query_states_19 = add(x = var_2709, y = var_2734)[name = string("query_states_19")]; tensor var_2737 = mul(x = var_2694_cast_fp16, y = cos_5)[name = string("op_2737")]; tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = var_2694_cast_fp16)[name = string("x1_11")]; tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = var_2694_cast_fp16)[name = string("x2_11")]; fp16 const_54_promoted = const()[name = string("const_54_promoted"), val = fp16(-0x1p+0)]; tensor var_2758 = mul(x = x2_11, y = const_54_promoted)[name = string("op_2758")]; int32 var_2760 = const()[name = string("op_2760"), val = int32(-1)]; bool var_2761_interleave_0 = const()[name = string("op_2761_interleave_0"), val = bool(false)]; tensor var_2761 = concat(axis = var_2760, interleave = var_2761_interleave_0, values = (var_2758, x1_11))[name = string("op_2761")]; tensor var_2762 = mul(x = var_2761, y = sin_5)[name = string("op_2762")]; tensor key_states_23 = add(x = var_2737, y = var_2762)[name = string("key_states_23")]; tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([2])]; tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([3])]; int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_38")]; tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_28, concat_39_values1_0, var_1760, concat_39_values3_0))[name = string("concat_39")]; tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = key_states_23, x = coreml_update_state_59)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_60_write_state")]; tensor coreml_update_state_60 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_60")]; tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([30])]; tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([31])]; int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_42")]; tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_34, concat_43_values1_0, var_1760, concat_43_values3_0))[name = string("concat_43")]; tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_19 = transpose(perm = var_2659, x = var_2654)[name = string("transpose_230")]; tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = value_states_19, x = coreml_update_state_60)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_61_write_state")]; tensor coreml_update_state_61 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_61")]; tensor var_2833_begin_0 = const()[name = string("op_2833_begin_0"), val = tensor([2, 0, 0, 0])]; tensor var_2833_end_0 = const()[name = string("op_2833_end_0"), val = tensor([3, 8, 1024, 128])]; tensor var_2833_end_mask_0 = const()[name = string("op_2833_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2833_cast_fp16 = slice_by_index(begin = var_2833_begin_0, end = var_2833_end_0, end_mask = var_2833_end_mask_0, x = coreml_update_state_61)[name = string("op_2833_cast_fp16")]; tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_2833_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; tensor var_2840_begin_0 = const()[name = string("op_2840_begin_0"), val = tensor([30, 0, 0, 0])]; tensor var_2840_end_0 = const()[name = string("op_2840_end_0"), val = tensor([31, 8, 1024, 128])]; tensor var_2840_end_mask_0 = const()[name = string("op_2840_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2840_cast_fp16 = slice_by_index(begin = var_2840_begin_0, end = var_2840_end_0, end_mask = var_2840_end_mask_0, x = coreml_update_state_61)[name = string("op_2840_cast_fp16")]; tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_2840_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; tensor x_47_axes_0 = const()[name = string("x_47_axes_0"), val = tensor([1])]; tensor x_47_cast_fp16 = expand_dims(axes = x_47_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_47_cast_fp16")]; tensor var_2869 = const()[name = string("op_2869"), val = tensor([1, 2, 1, 1])]; tensor x_49_cast_fp16 = tile(reps = var_2869, x = x_47_cast_fp16)[name = string("x_49_cast_fp16")]; tensor var_2881 = const()[name = string("op_2881"), val = tensor([1, -1, 1024, 128])]; tensor key_states_27_cast_fp16 = reshape(shape = var_2881, x = x_49_cast_fp16)[name = string("key_states_27_cast_fp16")]; tensor x_53_axes_0 = const()[name = string("x_53_axes_0"), val = tensor([1])]; tensor x_53_cast_fp16 = expand_dims(axes = x_53_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_53_cast_fp16")]; tensor var_2889 = const()[name = string("op_2889"), val = tensor([1, 2, 1, 1])]; tensor x_55_cast_fp16 = tile(reps = var_2889, x = x_53_cast_fp16)[name = string("x_55_cast_fp16")]; bool var_2916_transpose_x_0 = const()[name = string("op_2916_transpose_x_0"), val = bool(false)]; bool var_2916_transpose_y_0 = const()[name = string("op_2916_transpose_y_0"), val = bool(true)]; tensor var_2916 = matmul(transpose_x = var_2916_transpose_x_0, transpose_y = var_2916_transpose_y_0, x = query_states_19, y = key_states_27_cast_fp16)[name = string("op_2916")]; fp16 var_2917_to_fp16 = const()[name = string("op_2917_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_9_cast_fp16 = mul(x = var_2916, y = var_2917_to_fp16)[name = string("attn_weights_9_cast_fp16")]; tensor attn_weights_11_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("attn_weights_11_cast_fp16")]; int32 var_2952 = const()[name = string("op_2952"), val = int32(-1)]; tensor var_2954_cast_fp16 = softmax(axis = var_2952, x = attn_weights_11_cast_fp16)[name = string("op_2954_cast_fp16")]; tensor concat_48 = const()[name = string("concat_48"), val = tensor([16, 64, 1024])]; tensor reshape_6_cast_fp16 = reshape(shape = concat_48, x = var_2954_cast_fp16)[name = string("reshape_6_cast_fp16")]; tensor concat_49 = const()[name = string("concat_49"), val = tensor([16, 1024, 128])]; tensor reshape_7_cast_fp16 = reshape(shape = concat_49, x = x_55_cast_fp16)[name = string("reshape_7_cast_fp16")]; bool matmul_2_transpose_x_0 = const()[name = string("matmul_2_transpose_x_0"), val = bool(false)]; bool matmul_2_transpose_y_0 = const()[name = string("matmul_2_transpose_y_0"), val = bool(false)]; tensor matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_0, transpose_y = matmul_2_transpose_y_0, x = reshape_6_cast_fp16, y = reshape_7_cast_fp16)[name = string("matmul_2_cast_fp16")]; tensor concat_53 = const()[name = string("concat_53"), val = tensor([1, 16, 64, 128])]; tensor reshape_8_cast_fp16 = reshape(shape = concat_53, x = matmul_2_cast_fp16)[name = string("reshape_8_cast_fp16")]; tensor var_2966_perm_0 = const()[name = string("op_2966_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2985 = const()[name = string("op_2985"), val = tensor([1, 64, 2048])]; tensor var_2966_cast_fp16 = transpose(perm = var_2966_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_229")]; tensor attn_output_25_cast_fp16 = reshape(shape = var_2985, x = var_2966_cast_fp16)[name = string("attn_output_25_cast_fp16")]; tensor var_2990 = const()[name = string("op_2990"), val = tensor([0, 2, 1])]; string var_3006_pad_type_0 = const()[name = string("op_3006_pad_type_0"), val = string("valid")]; int32 var_3006_groups_0 = const()[name = string("op_3006_groups_0"), val = int32(1)]; tensor var_3006_strides_0 = const()[name = string("op_3006_strides_0"), val = tensor([1])]; tensor var_3006_pad_0 = const()[name = string("op_3006_pad_0"), val = tensor([0, 0])]; tensor var_3006_dilations_0 = const()[name = string("op_3006_dilations_0"), val = tensor([1])]; tensor squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1017097856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1020243648))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_2991_cast_fp16 = transpose(perm = var_2990, x = attn_output_25_cast_fp16)[name = string("transpose_228")]; tensor var_3006_cast_fp16 = conv(dilations = var_3006_dilations_0, groups = var_3006_groups_0, pad = var_3006_pad_0, pad_type = var_3006_pad_type_0, strides = var_3006_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_2991_cast_fp16)[name = string("op_3006_cast_fp16")]; tensor var_3010 = const()[name = string("op_3010"), val = tensor([0, 2, 1])]; tensor attn_output_29_cast_fp16 = transpose(perm = var_3010, x = var_3006_cast_fp16)[name = string("transpose_227")]; tensor hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = attn_output_29_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; tensor mean_23_axes_0 = const()[name = string("mean_23_axes_0"), val = tensor([-1])]; bool mean_23_keep_dims_0 = const()[name = string("mean_23_keep_dims_0"), val = bool(true)]; tensor mean_23_cast_fp16 = reduce_mean(axes = mean_23_axes_0, keep_dims = mean_23_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_23_cast_fp16")]; tensor input_47_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_23_cast_fp16)[name = string("input_47_cast_fp16")]; tensor var_3029_axes_0 = const()[name = string("op_3029_axes_0"), val = tensor([-1])]; tensor model_model_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1020276480)))]; fp16 var_3017_to_fp16 = const()[name = string("op_3017_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3029_cast_fp16 = layer_norm(axes = var_3029_axes_0, epsilon = var_3017_to_fp16, gamma = model_model_layers_2_post_attention_layernorm_weight_to_fp16, x = input_47_cast_fp16)[name = string("op_3029_cast_fp16")]; tensor var_3043 = const()[name = string("op_3043"), val = tensor([0, 2, 1])]; tensor input_49_axes_0 = const()[name = string("input_49_axes_0"), val = tensor([2])]; tensor var_3044 = transpose(perm = var_3043, x = var_3029_cast_fp16)[name = string("transpose_226")]; tensor input_49 = expand_dims(axes = input_49_axes_0, x = var_3044)[name = string("input_49")]; string input_51_pad_type_0 = const()[name = string("input_51_pad_type_0"), val = string("valid")]; tensor input_51_strides_0 = const()[name = string("input_51_strides_0"), val = tensor([1, 1])]; tensor input_51_pad_0 = const()[name = string("input_51_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_51_dilations_0 = const()[name = string("input_51_dilations_0"), val = tensor([1, 1])]; int32 input_51_groups_0 = const()[name = string("input_51_groups_0"), val = int32(1)]; tensor input_51 = conv(dilations = input_51_dilations_0, groups = input_51_groups_0, pad = input_51_pad_0, pad_type = input_51_pad_type_0, strides = input_51_strides_0, weight = model_model_layers_2_mlp_gate_proj_weight_palettized, x = input_49)[name = string("input_51")]; string b_5_pad_type_0 = const()[name = string("b_5_pad_type_0"), val = string("valid")]; tensor b_5_strides_0 = const()[name = string("b_5_strides_0"), val = tensor([1, 1])]; tensor b_5_pad_0 = const()[name = string("b_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_5_dilations_0 = const()[name = string("b_5_dilations_0"), val = tensor([1, 1])]; int32 b_5_groups_0 = const()[name = string("b_5_groups_0"), val = int32(1)]; tensor b_5 = conv(dilations = b_5_dilations_0, groups = b_5_groups_0, pad = b_5_pad_0, pad_type = b_5_pad_type_0, strides = b_5_strides_0, weight = model_model_layers_2_mlp_up_proj_weight_palettized, x = input_49)[name = string("b_5")]; tensor c_5 = silu(x = input_51)[name = string("c_5")]; tensor input_53 = mul(x = c_5, y = b_5)[name = string("input_53")]; string e_5_pad_type_0 = const()[name = string("e_5_pad_type_0"), val = string("valid")]; tensor e_5_strides_0 = const()[name = string("e_5_strides_0"), val = tensor([1, 1])]; tensor e_5_pad_0 = const()[name = string("e_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_5_dilations_0 = const()[name = string("e_5_dilations_0"), val = tensor([1, 1])]; int32 e_5_groups_0 = const()[name = string("e_5_groups_0"), val = int32(1)]; tensor e_5 = conv(dilations = e_5_dilations_0, groups = e_5_groups_0, pad = e_5_pad_0, pad_type = e_5_pad_type_0, strides = e_5_strides_0, weight = model_model_layers_2_mlp_down_proj_weight_palettized, x = input_53)[name = string("e_5")]; tensor var_3066_axes_0 = const()[name = string("op_3066_axes_0"), val = tensor([2])]; tensor var_3066 = squeeze(axes = var_3066_axes_0, x = e_5)[name = string("op_3066")]; tensor var_3067 = const()[name = string("op_3067"), val = tensor([0, 2, 1])]; tensor var_3068 = transpose(perm = var_3067, x = var_3066)[name = string("transpose_225")]; tensor hidden_states_19_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = var_3068)[name = string("hidden_states_19_cast_fp16")]; tensor mean_25_axes_0 = const()[name = string("mean_25_axes_0"), val = tensor([-1])]; bool mean_25_keep_dims_0 = const()[name = string("mean_25_keep_dims_0"), val = bool(true)]; tensor mean_25_cast_fp16 = reduce_mean(axes = mean_25_axes_0, keep_dims = mean_25_keep_dims_0, x = hidden_states_19_cast_fp16)[name = string("mean_25_cast_fp16")]; tensor input_55_cast_fp16 = sub(x = hidden_states_19_cast_fp16, y = mean_25_cast_fp16)[name = string("input_55_cast_fp16")]; tensor var_3086_axes_0 = const()[name = string("op_3086_axes_0"), val = tensor([-1])]; tensor model_model_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1020280640)))]; fp16 var_3074_to_fp16 = const()[name = string("op_3074_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3086_cast_fp16 = layer_norm(axes = var_3086_axes_0, epsilon = var_3074_to_fp16, gamma = model_model_layers_3_input_layernorm_weight_to_fp16, x = input_55_cast_fp16)[name = string("op_3086_cast_fp16")]; tensor var_3098 = const()[name = string("op_3098"), val = tensor([0, 2, 1])]; tensor var_3101_axes_0 = const()[name = string("op_3101_axes_0"), val = tensor([2])]; tensor var_3099 = transpose(perm = var_3098, x = var_3086_cast_fp16)[name = string("transpose_224")]; tensor var_3101 = expand_dims(axes = var_3101_axes_0, x = var_3099)[name = string("op_3101")]; string query_states_25_pad_type_0 = const()[name = string("query_states_25_pad_type_0"), val = string("valid")]; tensor query_states_25_strides_0 = const()[name = string("query_states_25_strides_0"), val = tensor([1, 1])]; tensor query_states_25_pad_0 = const()[name = string("query_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_25_dilations_0 = const()[name = string("query_states_25_dilations_0"), val = tensor([1, 1])]; int32 query_states_25_groups_0 = const()[name = string("query_states_25_groups_0"), val = int32(1)]; tensor query_states_25 = conv(dilations = query_states_25_dilations_0, groups = query_states_25_groups_0, pad = query_states_25_pad_0, pad_type = query_states_25_pad_type_0, strides = query_states_25_strides_0, weight = model_model_layers_3_self_attn_q_proj_weight_palettized, x = var_3101)[name = string("query_states_25")]; string key_states_31_pad_type_0 = const()[name = string("key_states_31_pad_type_0"), val = string("valid")]; tensor key_states_31_strides_0 = const()[name = string("key_states_31_strides_0"), val = tensor([1, 1])]; tensor key_states_31_pad_0 = const()[name = string("key_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_31_dilations_0 = const()[name = string("key_states_31_dilations_0"), val = tensor([1, 1])]; int32 key_states_31_groups_0 = const()[name = string("key_states_31_groups_0"), val = int32(1)]; tensor key_states_31 = conv(dilations = key_states_31_dilations_0, groups = key_states_31_groups_0, pad = key_states_31_pad_0, pad_type = key_states_31_pad_type_0, strides = key_states_31_strides_0, weight = model_model_layers_3_self_attn_k_proj_weight_palettized, x = var_3101)[name = string("key_states_31")]; string value_states_25_pad_type_0 = const()[name = string("value_states_25_pad_type_0"), val = string("valid")]; tensor value_states_25_strides_0 = const()[name = string("value_states_25_strides_0"), val = tensor([1, 1])]; tensor value_states_25_pad_0 = const()[name = string("value_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_25_dilations_0 = const()[name = string("value_states_25_dilations_0"), val = tensor([1, 1])]; int32 value_states_25_groups_0 = const()[name = string("value_states_25_groups_0"), val = int32(1)]; tensor value_states_25 = conv(dilations = value_states_25_dilations_0, groups = value_states_25_groups_0, pad = value_states_25_pad_0, pad_type = value_states_25_pad_type_0, strides = value_states_25_strides_0, weight = model_model_layers_3_self_attn_v_proj_weight_palettized, x = var_3101)[name = string("value_states_25")]; tensor var_3143 = const()[name = string("op_3143"), val = tensor([1, 16, 128, 64])]; tensor var_3144 = reshape(shape = var_3143, x = query_states_25)[name = string("op_3144")]; tensor var_3149 = const()[name = string("op_3149"), val = tensor([0, 1, 3, 2])]; tensor var_3154 = const()[name = string("op_3154"), val = tensor([1, 8, 128, 64])]; tensor var_3155 = reshape(shape = var_3154, x = key_states_31)[name = string("op_3155")]; tensor var_3160 = const()[name = string("op_3160"), val = tensor([0, 1, 3, 2])]; tensor var_3165 = const()[name = string("op_3165"), val = tensor([1, 8, 128, 64])]; tensor var_3166 = reshape(shape = var_3165, x = value_states_25)[name = string("op_3166")]; tensor var_3171 = const()[name = string("op_3171"), val = tensor([0, 1, 3, 2])]; tensor mean_27_axes_0 = const()[name = string("mean_27_axes_0"), val = tensor([-1])]; bool mean_27_keep_dims_0 = const()[name = string("mean_27_keep_dims_0"), val = bool(true)]; tensor x_61 = transpose(perm = var_3149, x = var_3144)[name = string("transpose_223")]; tensor mean_27 = reduce_mean(axes = mean_27_axes_0, keep_dims = mean_27_keep_dims_0, x = x_61)[name = string("mean_27")]; tensor input_59 = sub(x = x_61, y = mean_27)[name = string("input_59")]; tensor var_3188_axes_0 = const()[name = string("op_3188_axes_0"), val = tensor([-1])]; tensor model_model_layers_3_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_3_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1020284800)))]; fp16 var_3176_to_fp16 = const()[name = string("op_3176_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3188_cast_fp16 = layer_norm(axes = var_3188_axes_0, epsilon = var_3176_to_fp16, gamma = model_model_layers_3_self_attn_q_norm_weight_to_fp16, x = input_59)[name = string("op_3188_cast_fp16")]; tensor mean_29_axes_0 = const()[name = string("mean_29_axes_0"), val = tensor([-1])]; bool mean_29_keep_dims_0 = const()[name = string("mean_29_keep_dims_0"), val = bool(true)]; tensor x_63 = transpose(perm = var_3160, x = var_3155)[name = string("transpose_222")]; tensor mean_29 = reduce_mean(axes = mean_29_axes_0, keep_dims = mean_29_keep_dims_0, x = x_63)[name = string("mean_29")]; tensor input_61 = sub(x = x_63, y = mean_29)[name = string("input_61")]; tensor var_3206_axes_0 = const()[name = string("op_3206_axes_0"), val = tensor([-1])]; tensor model_model_layers_3_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_3_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1020285120)))]; fp16 var_3194_to_fp16 = const()[name = string("op_3194_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3206_cast_fp16 = layer_norm(axes = var_3206_axes_0, epsilon = var_3194_to_fp16, gamma = model_model_layers_3_self_attn_k_norm_weight_to_fp16, x = input_61)[name = string("op_3206_cast_fp16")]; tensor var_3221 = mul(x = var_3188_cast_fp16, y = cos_5)[name = string("op_3221")]; tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = var_3188_cast_fp16)[name = string("x1_13")]; tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = var_3188_cast_fp16)[name = string("x2_13")]; fp16 const_73_promoted = const()[name = string("const_73_promoted"), val = fp16(-0x1p+0)]; tensor var_3242 = mul(x = x2_13, y = const_73_promoted)[name = string("op_3242")]; int32 var_3244 = const()[name = string("op_3244"), val = int32(-1)]; bool var_3245_interleave_0 = const()[name = string("op_3245_interleave_0"), val = bool(false)]; tensor var_3245 = concat(axis = var_3244, interleave = var_3245_interleave_0, values = (var_3242, x1_13))[name = string("op_3245")]; tensor var_3246 = mul(x = var_3245, y = sin_5)[name = string("op_3246")]; tensor query_states_27 = add(x = var_3221, y = var_3246)[name = string("query_states_27")]; tensor var_3249 = mul(x = var_3206_cast_fp16, y = cos_5)[name = string("op_3249")]; tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = var_3206_cast_fp16)[name = string("x1_15")]; tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = var_3206_cast_fp16)[name = string("x2_15")]; fp16 const_76_promoted = const()[name = string("const_76_promoted"), val = fp16(-0x1p+0)]; tensor var_3270 = mul(x = x2_15, y = const_76_promoted)[name = string("op_3270")]; int32 var_3272 = const()[name = string("op_3272"), val = int32(-1)]; bool var_3273_interleave_0 = const()[name = string("op_3273_interleave_0"), val = bool(false)]; tensor var_3273 = concat(axis = var_3272, interleave = var_3273_interleave_0, values = (var_3270, x1_15))[name = string("op_3273")]; tensor var_3274 = mul(x = var_3273, y = sin_5)[name = string("op_3274")]; tensor key_states_33 = add(x = var_3249, y = var_3274)[name = string("key_states_33")]; tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([3])]; tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([4])]; int32 concat_56_axis_0 = const()[name = string("concat_56_axis_0"), val = int32(0)]; bool concat_56_interleave_0 = const()[name = string("concat_56_interleave_0"), val = bool(false)]; tensor concat_56 = concat(axis = concat_56_axis_0, interleave = concat_56_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_56")]; tensor concat_57_values1_0 = const()[name = string("concat_57_values1_0"), val = tensor([0])]; tensor concat_57_values3_0 = const()[name = string("concat_57_values3_0"), val = tensor([0])]; int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)]; bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)]; tensor concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (expand_dims_40, concat_57_values1_0, var_1760, concat_57_values3_0))[name = string("concat_57")]; tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_56, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_57, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = key_states_33, x = coreml_update_state_61)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_62_write_state")]; tensor coreml_update_state_62 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_62")]; tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([31])]; tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([32])]; int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_60")]; tensor concat_61_values1_0 = const()[name = string("concat_61_values1_0"), val = tensor([0])]; tensor concat_61_values3_0 = const()[name = string("concat_61_values3_0"), val = tensor([0])]; int32 concat_61_axis_0 = const()[name = string("concat_61_axis_0"), val = int32(0)]; bool concat_61_interleave_0 = const()[name = string("concat_61_interleave_0"), val = bool(false)]; tensor concat_61 = concat(axis = concat_61_axis_0, interleave = concat_61_interleave_0, values = (expand_dims_46, concat_61_values1_0, var_1760, concat_61_values3_0))[name = string("concat_61")]; tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_27 = transpose(perm = var_3171, x = var_3166)[name = string("transpose_221")]; tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_60, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_61, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = value_states_27, x = coreml_update_state_62)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_63_write_state")]; tensor coreml_update_state_63 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_63")]; tensor var_3345_begin_0 = const()[name = string("op_3345_begin_0"), val = tensor([3, 0, 0, 0])]; tensor var_3345_end_0 = const()[name = string("op_3345_end_0"), val = tensor([4, 8, 1024, 128])]; tensor var_3345_end_mask_0 = const()[name = string("op_3345_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3345_cast_fp16 = slice_by_index(begin = var_3345_begin_0, end = var_3345_end_0, end_mask = var_3345_end_mask_0, x = coreml_update_state_63)[name = string("op_3345_cast_fp16")]; tensor K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor([0])]; tensor K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_3345_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")]; tensor var_3352_begin_0 = const()[name = string("op_3352_begin_0"), val = tensor([31, 0, 0, 0])]; tensor var_3352_end_0 = const()[name = string("op_3352_end_0"), val = tensor([32, 8, 1024, 128])]; tensor var_3352_end_mask_0 = const()[name = string("op_3352_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3352_cast_fp16 = slice_by_index(begin = var_3352_begin_0, end = var_3352_end_0, end_mask = var_3352_end_mask_0, x = coreml_update_state_63)[name = string("op_3352_cast_fp16")]; tensor V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor([0])]; tensor V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_3352_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")]; tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_67_cast_fp16")]; tensor var_3381 = const()[name = string("op_3381"), val = tensor([1, 2, 1, 1])]; tensor x_69_cast_fp16 = tile(reps = var_3381, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; tensor var_3393 = const()[name = string("op_3393"), val = tensor([1, -1, 1024, 128])]; tensor key_states_37_cast_fp16 = reshape(shape = var_3393, x = x_69_cast_fp16)[name = string("key_states_37_cast_fp16")]; tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_73_cast_fp16")]; tensor var_3401 = const()[name = string("op_3401"), val = tensor([1, 2, 1, 1])]; tensor x_75_cast_fp16 = tile(reps = var_3401, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; bool var_3428_transpose_x_0 = const()[name = string("op_3428_transpose_x_0"), val = bool(false)]; bool var_3428_transpose_y_0 = const()[name = string("op_3428_transpose_y_0"), val = bool(true)]; tensor var_3428 = matmul(transpose_x = var_3428_transpose_x_0, transpose_y = var_3428_transpose_y_0, x = query_states_27, y = key_states_37_cast_fp16)[name = string("op_3428")]; fp16 var_3429_to_fp16 = const()[name = string("op_3429_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_13_cast_fp16 = mul(x = var_3428, y = var_3429_to_fp16)[name = string("attn_weights_13_cast_fp16")]; tensor attn_weights_15_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("attn_weights_15_cast_fp16")]; int32 var_3464 = const()[name = string("op_3464"), val = int32(-1)]; tensor var_3466_cast_fp16 = softmax(axis = var_3464, x = attn_weights_15_cast_fp16)[name = string("op_3466_cast_fp16")]; tensor concat_66 = const()[name = string("concat_66"), val = tensor([16, 64, 1024])]; tensor reshape_9_cast_fp16 = reshape(shape = concat_66, x = var_3466_cast_fp16)[name = string("reshape_9_cast_fp16")]; tensor concat_67 = const()[name = string("concat_67"), val = tensor([16, 1024, 128])]; tensor reshape_10_cast_fp16 = reshape(shape = concat_67, x = x_75_cast_fp16)[name = string("reshape_10_cast_fp16")]; bool matmul_3_transpose_x_0 = const()[name = string("matmul_3_transpose_x_0"), val = bool(false)]; bool matmul_3_transpose_y_0 = const()[name = string("matmul_3_transpose_y_0"), val = bool(false)]; tensor matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_0, transpose_y = matmul_3_transpose_y_0, x = reshape_9_cast_fp16, y = reshape_10_cast_fp16)[name = string("matmul_3_cast_fp16")]; tensor concat_71 = const()[name = string("concat_71"), val = tensor([1, 16, 64, 128])]; tensor reshape_11_cast_fp16 = reshape(shape = concat_71, x = matmul_3_cast_fp16)[name = string("reshape_11_cast_fp16")]; tensor var_3478_perm_0 = const()[name = string("op_3478_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3497 = const()[name = string("op_3497"), val = tensor([1, 64, 2048])]; tensor var_3478_cast_fp16 = transpose(perm = var_3478_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_220")]; tensor attn_output_35_cast_fp16 = reshape(shape = var_3497, x = var_3478_cast_fp16)[name = string("attn_output_35_cast_fp16")]; tensor var_3502 = const()[name = string("op_3502"), val = tensor([0, 2, 1])]; string var_3518_pad_type_0 = const()[name = string("op_3518_pad_type_0"), val = string("valid")]; int32 var_3518_groups_0 = const()[name = string("op_3518_groups_0"), val = int32(1)]; tensor var_3518_strides_0 = const()[name = string("op_3518_strides_0"), val = tensor([1])]; tensor var_3518_pad_0 = const()[name = string("op_3518_pad_0"), val = tensor([0, 0])]; tensor var_3518_dilations_0 = const()[name = string("op_3518_dilations_0"), val = tensor([1])]; tensor squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1020285440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023431232))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_3503_cast_fp16 = transpose(perm = var_3502, x = attn_output_35_cast_fp16)[name = string("transpose_219")]; tensor var_3518_cast_fp16 = conv(dilations = var_3518_dilations_0, groups = var_3518_groups_0, pad = var_3518_pad_0, pad_type = var_3518_pad_type_0, strides = var_3518_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_3503_cast_fp16)[name = string("op_3518_cast_fp16")]; tensor var_3522 = const()[name = string("op_3522"), val = tensor([0, 2, 1])]; tensor attn_output_39_cast_fp16 = transpose(perm = var_3522, x = var_3518_cast_fp16)[name = string("transpose_218")]; tensor hidden_states_23_cast_fp16 = add(x = hidden_states_19_cast_fp16, y = attn_output_39_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; tensor mean_31_axes_0 = const()[name = string("mean_31_axes_0"), val = tensor([-1])]; bool mean_31_keep_dims_0 = const()[name = string("mean_31_keep_dims_0"), val = bool(true)]; tensor mean_31_cast_fp16 = reduce_mean(axes = mean_31_axes_0, keep_dims = mean_31_keep_dims_0, x = hidden_states_23_cast_fp16)[name = string("mean_31_cast_fp16")]; tensor input_65_cast_fp16 = sub(x = hidden_states_23_cast_fp16, y = mean_31_cast_fp16)[name = string("input_65_cast_fp16")]; tensor var_3541_axes_0 = const()[name = string("op_3541_axes_0"), val = tensor([-1])]; tensor model_model_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023464064)))]; fp16 var_3529_to_fp16 = const()[name = string("op_3529_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3541_cast_fp16 = layer_norm(axes = var_3541_axes_0, epsilon = var_3529_to_fp16, gamma = model_model_layers_3_post_attention_layernorm_weight_to_fp16, x = input_65_cast_fp16)[name = string("op_3541_cast_fp16")]; tensor var_3555 = const()[name = string("op_3555"), val = tensor([0, 2, 1])]; tensor input_67_axes_0 = const()[name = string("input_67_axes_0"), val = tensor([2])]; tensor var_3556 = transpose(perm = var_3555, x = var_3541_cast_fp16)[name = string("transpose_217")]; tensor input_67 = expand_dims(axes = input_67_axes_0, x = var_3556)[name = string("input_67")]; string input_69_pad_type_0 = const()[name = string("input_69_pad_type_0"), val = string("valid")]; tensor input_69_strides_0 = const()[name = string("input_69_strides_0"), val = tensor([1, 1])]; tensor input_69_pad_0 = const()[name = string("input_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_69_dilations_0 = const()[name = string("input_69_dilations_0"), val = tensor([1, 1])]; int32 input_69_groups_0 = const()[name = string("input_69_groups_0"), val = int32(1)]; tensor input_69 = conv(dilations = input_69_dilations_0, groups = input_69_groups_0, pad = input_69_pad_0, pad_type = input_69_pad_type_0, strides = input_69_strides_0, weight = model_model_layers_3_mlp_gate_proj_weight_palettized, x = input_67)[name = string("input_69")]; string b_7_pad_type_0 = const()[name = string("b_7_pad_type_0"), val = string("valid")]; tensor b_7_strides_0 = const()[name = string("b_7_strides_0"), val = tensor([1, 1])]; tensor b_7_pad_0 = const()[name = string("b_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_7_dilations_0 = const()[name = string("b_7_dilations_0"), val = tensor([1, 1])]; int32 b_7_groups_0 = const()[name = string("b_7_groups_0"), val = int32(1)]; tensor b_7 = conv(dilations = b_7_dilations_0, groups = b_7_groups_0, pad = b_7_pad_0, pad_type = b_7_pad_type_0, strides = b_7_strides_0, weight = model_model_layers_3_mlp_up_proj_weight_palettized, x = input_67)[name = string("b_7")]; tensor c_7 = silu(x = input_69)[name = string("c_7")]; tensor input_71 = mul(x = c_7, y = b_7)[name = string("input_71")]; string e_7_pad_type_0 = const()[name = string("e_7_pad_type_0"), val = string("valid")]; tensor e_7_strides_0 = const()[name = string("e_7_strides_0"), val = tensor([1, 1])]; tensor e_7_pad_0 = const()[name = string("e_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_7_dilations_0 = const()[name = string("e_7_dilations_0"), val = tensor([1, 1])]; int32 e_7_groups_0 = const()[name = string("e_7_groups_0"), val = int32(1)]; tensor e_7 = conv(dilations = e_7_dilations_0, groups = e_7_groups_0, pad = e_7_pad_0, pad_type = e_7_pad_type_0, strides = e_7_strides_0, weight = model_model_layers_3_mlp_down_proj_weight_palettized, x = input_71)[name = string("e_7")]; tensor var_3578_axes_0 = const()[name = string("op_3578_axes_0"), val = tensor([2])]; tensor var_3578 = squeeze(axes = var_3578_axes_0, x = e_7)[name = string("op_3578")]; tensor var_3579 = const()[name = string("op_3579"), val = tensor([0, 2, 1])]; tensor var_3580 = transpose(perm = var_3579, x = var_3578)[name = string("transpose_216")]; tensor hidden_states_25_cast_fp16 = add(x = hidden_states_23_cast_fp16, y = var_3580)[name = string("hidden_states_25_cast_fp16")]; tensor mean_33_axes_0 = const()[name = string("mean_33_axes_0"), val = tensor([-1])]; bool mean_33_keep_dims_0 = const()[name = string("mean_33_keep_dims_0"), val = bool(true)]; tensor mean_33_cast_fp16 = reduce_mean(axes = mean_33_axes_0, keep_dims = mean_33_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_33_cast_fp16")]; tensor input_73_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_33_cast_fp16)[name = string("input_73_cast_fp16")]; tensor var_3598_axes_0 = const()[name = string("op_3598_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023468224)))]; fp16 var_3586_to_fp16 = const()[name = string("op_3586_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3598_cast_fp16 = layer_norm(axes = var_3598_axes_0, epsilon = var_3586_to_fp16, gamma = model_model_layers_4_input_layernorm_weight_to_fp16, x = input_73_cast_fp16)[name = string("op_3598_cast_fp16")]; tensor var_3610 = const()[name = string("op_3610"), val = tensor([0, 2, 1])]; tensor var_3613_axes_0 = const()[name = string("op_3613_axes_0"), val = tensor([2])]; tensor var_3611 = transpose(perm = var_3610, x = var_3598_cast_fp16)[name = string("transpose_215")]; tensor var_3613 = expand_dims(axes = var_3613_axes_0, x = var_3611)[name = string("op_3613")]; string query_states_33_pad_type_0 = const()[name = string("query_states_33_pad_type_0"), val = string("valid")]; tensor query_states_33_strides_0 = const()[name = string("query_states_33_strides_0"), val = tensor([1, 1])]; tensor query_states_33_pad_0 = const()[name = string("query_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_33_dilations_0 = const()[name = string("query_states_33_dilations_0"), val = tensor([1, 1])]; int32 query_states_33_groups_0 = const()[name = string("query_states_33_groups_0"), val = int32(1)]; tensor query_states_33 = conv(dilations = query_states_33_dilations_0, groups = query_states_33_groups_0, pad = query_states_33_pad_0, pad_type = query_states_33_pad_type_0, strides = query_states_33_strides_0, weight = model_model_layers_4_self_attn_q_proj_weight_palettized, x = var_3613)[name = string("query_states_33")]; string key_states_41_pad_type_0 = const()[name = string("key_states_41_pad_type_0"), val = string("valid")]; tensor key_states_41_strides_0 = const()[name = string("key_states_41_strides_0"), val = tensor([1, 1])]; tensor key_states_41_pad_0 = const()[name = string("key_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_41_dilations_0 = const()[name = string("key_states_41_dilations_0"), val = tensor([1, 1])]; int32 key_states_41_groups_0 = const()[name = string("key_states_41_groups_0"), val = int32(1)]; tensor key_states_41 = conv(dilations = key_states_41_dilations_0, groups = key_states_41_groups_0, pad = key_states_41_pad_0, pad_type = key_states_41_pad_type_0, strides = key_states_41_strides_0, weight = model_model_layers_4_self_attn_k_proj_weight_palettized, x = var_3613)[name = string("key_states_41")]; string value_states_33_pad_type_0 = const()[name = string("value_states_33_pad_type_0"), val = string("valid")]; tensor value_states_33_strides_0 = const()[name = string("value_states_33_strides_0"), val = tensor([1, 1])]; tensor value_states_33_pad_0 = const()[name = string("value_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_33_dilations_0 = const()[name = string("value_states_33_dilations_0"), val = tensor([1, 1])]; int32 value_states_33_groups_0 = const()[name = string("value_states_33_groups_0"), val = int32(1)]; tensor value_states_33 = conv(dilations = value_states_33_dilations_0, groups = value_states_33_groups_0, pad = value_states_33_pad_0, pad_type = value_states_33_pad_type_0, strides = value_states_33_strides_0, weight = model_model_layers_4_self_attn_v_proj_weight_palettized, x = var_3613)[name = string("value_states_33")]; tensor var_3655 = const()[name = string("op_3655"), val = tensor([1, 16, 128, 64])]; tensor var_3656 = reshape(shape = var_3655, x = query_states_33)[name = string("op_3656")]; tensor var_3661 = const()[name = string("op_3661"), val = tensor([0, 1, 3, 2])]; tensor var_3666 = const()[name = string("op_3666"), val = tensor([1, 8, 128, 64])]; tensor var_3667 = reshape(shape = var_3666, x = key_states_41)[name = string("op_3667")]; tensor var_3672 = const()[name = string("op_3672"), val = tensor([0, 1, 3, 2])]; tensor var_3677 = const()[name = string("op_3677"), val = tensor([1, 8, 128, 64])]; tensor var_3678 = reshape(shape = var_3677, x = value_states_33)[name = string("op_3678")]; tensor var_3683 = const()[name = string("op_3683"), val = tensor([0, 1, 3, 2])]; tensor mean_35_axes_0 = const()[name = string("mean_35_axes_0"), val = tensor([-1])]; bool mean_35_keep_dims_0 = const()[name = string("mean_35_keep_dims_0"), val = bool(true)]; tensor x_81 = transpose(perm = var_3661, x = var_3656)[name = string("transpose_214")]; tensor mean_35 = reduce_mean(axes = mean_35_axes_0, keep_dims = mean_35_keep_dims_0, x = x_81)[name = string("mean_35")]; tensor input_77 = sub(x = x_81, y = mean_35)[name = string("input_77")]; tensor var_3700_axes_0 = const()[name = string("op_3700_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_4_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023472384)))]; fp16 var_3688_to_fp16 = const()[name = string("op_3688_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3700_cast_fp16 = layer_norm(axes = var_3700_axes_0, epsilon = var_3688_to_fp16, gamma = model_model_layers_4_self_attn_q_norm_weight_to_fp16, x = input_77)[name = string("op_3700_cast_fp16")]; tensor mean_37_axes_0 = const()[name = string("mean_37_axes_0"), val = tensor([-1])]; bool mean_37_keep_dims_0 = const()[name = string("mean_37_keep_dims_0"), val = bool(true)]; tensor x_83 = transpose(perm = var_3672, x = var_3667)[name = string("transpose_213")]; tensor mean_37 = reduce_mean(axes = mean_37_axes_0, keep_dims = mean_37_keep_dims_0, x = x_83)[name = string("mean_37")]; tensor input_79 = sub(x = x_83, y = mean_37)[name = string("input_79")]; tensor var_3718_axes_0 = const()[name = string("op_3718_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_4_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023472704)))]; fp16 var_3706_to_fp16 = const()[name = string("op_3706_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3718_cast_fp16 = layer_norm(axes = var_3718_axes_0, epsilon = var_3706_to_fp16, gamma = model_model_layers_4_self_attn_k_norm_weight_to_fp16, x = input_79)[name = string("op_3718_cast_fp16")]; tensor var_3733 = mul(x = var_3700_cast_fp16, y = cos_5)[name = string("op_3733")]; tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = var_3700_cast_fp16)[name = string("x1_17")]; tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = var_3700_cast_fp16)[name = string("x2_17")]; fp16 const_95_promoted = const()[name = string("const_95_promoted"), val = fp16(-0x1p+0)]; tensor var_3754 = mul(x = x2_17, y = const_95_promoted)[name = string("op_3754")]; int32 var_3756 = const()[name = string("op_3756"), val = int32(-1)]; bool var_3757_interleave_0 = const()[name = string("op_3757_interleave_0"), val = bool(false)]; tensor var_3757 = concat(axis = var_3756, interleave = var_3757_interleave_0, values = (var_3754, x1_17))[name = string("op_3757")]; tensor var_3758 = mul(x = var_3757, y = sin_5)[name = string("op_3758")]; tensor query_states_35 = add(x = var_3733, y = var_3758)[name = string("query_states_35")]; tensor var_3761 = mul(x = var_3718_cast_fp16, y = cos_5)[name = string("op_3761")]; tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = var_3718_cast_fp16)[name = string("x1_19")]; tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = var_3718_cast_fp16)[name = string("x2_19")]; fp16 const_98_promoted = const()[name = string("const_98_promoted"), val = fp16(-0x1p+0)]; tensor var_3782 = mul(x = x2_19, y = const_98_promoted)[name = string("op_3782")]; int32 var_3784 = const()[name = string("op_3784"), val = int32(-1)]; bool var_3785_interleave_0 = const()[name = string("op_3785_interleave_0"), val = bool(false)]; tensor var_3785 = concat(axis = var_3784, interleave = var_3785_interleave_0, values = (var_3782, x1_19))[name = string("op_3785")]; tensor var_3786 = mul(x = var_3785, y = sin_5)[name = string("op_3786")]; tensor key_states_43 = add(x = var_3761, y = var_3786)[name = string("key_states_43")]; tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([4])]; tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([5])]; int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)]; bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)]; tensor concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_74")]; tensor concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor([0])]; tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_52, concat_75_values1_0, var_1760, concat_75_values3_0))[name = string("concat_75")]; tensor model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_74, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_75, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = key_states_43, x = coreml_update_state_63)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_64_write_state")]; tensor coreml_update_state_64 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_64")]; tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([32])]; tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([33])]; int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)]; bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)]; tensor concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_78")]; tensor concat_79_values1_0 = const()[name = string("concat_79_values1_0"), val = tensor([0])]; tensor concat_79_values3_0 = const()[name = string("concat_79_values3_0"), val = tensor([0])]; int32 concat_79_axis_0 = const()[name = string("concat_79_axis_0"), val = int32(0)]; bool concat_79_interleave_0 = const()[name = string("concat_79_interleave_0"), val = bool(false)]; tensor concat_79 = concat(axis = concat_79_axis_0, interleave = concat_79_interleave_0, values = (expand_dims_58, concat_79_values1_0, var_1760, concat_79_values3_0))[name = string("concat_79")]; tensor model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_35 = transpose(perm = var_3683, x = var_3678)[name = string("transpose_212")]; tensor model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_78, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_79, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = value_states_35, x = coreml_update_state_64)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_65_write_state")]; tensor coreml_update_state_65 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_65")]; tensor var_3857_begin_0 = const()[name = string("op_3857_begin_0"), val = tensor([4, 0, 0, 0])]; tensor var_3857_end_0 = const()[name = string("op_3857_end_0"), val = tensor([5, 8, 1024, 128])]; tensor var_3857_end_mask_0 = const()[name = string("op_3857_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3857_cast_fp16 = slice_by_index(begin = var_3857_begin_0, end = var_3857_end_0, end_mask = var_3857_end_mask_0, x = coreml_update_state_65)[name = string("op_3857_cast_fp16")]; tensor K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor([0])]; tensor K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_3857_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")]; tensor var_3864_begin_0 = const()[name = string("op_3864_begin_0"), val = tensor([32, 0, 0, 0])]; tensor var_3864_end_0 = const()[name = string("op_3864_end_0"), val = tensor([33, 8, 1024, 128])]; tensor var_3864_end_mask_0 = const()[name = string("op_3864_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3864_cast_fp16 = slice_by_index(begin = var_3864_begin_0, end = var_3864_end_0, end_mask = var_3864_end_mask_0, x = coreml_update_state_65)[name = string("op_3864_cast_fp16")]; tensor V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor([0])]; tensor V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_3864_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")]; tensor x_87_axes_0 = const()[name = string("x_87_axes_0"), val = tensor([1])]; tensor x_87_cast_fp16 = expand_dims(axes = x_87_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_87_cast_fp16")]; tensor var_3893 = const()[name = string("op_3893"), val = tensor([1, 2, 1, 1])]; tensor x_89_cast_fp16 = tile(reps = var_3893, x = x_87_cast_fp16)[name = string("x_89_cast_fp16")]; tensor var_3905 = const()[name = string("op_3905"), val = tensor([1, -1, 1024, 128])]; tensor key_states_47_cast_fp16 = reshape(shape = var_3905, x = x_89_cast_fp16)[name = string("key_states_47_cast_fp16")]; tensor x_93_axes_0 = const()[name = string("x_93_axes_0"), val = tensor([1])]; tensor x_93_cast_fp16 = expand_dims(axes = x_93_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_93_cast_fp16")]; tensor var_3913 = const()[name = string("op_3913"), val = tensor([1, 2, 1, 1])]; tensor x_95_cast_fp16 = tile(reps = var_3913, x = x_93_cast_fp16)[name = string("x_95_cast_fp16")]; bool var_3940_transpose_x_0 = const()[name = string("op_3940_transpose_x_0"), val = bool(false)]; bool var_3940_transpose_y_0 = const()[name = string("op_3940_transpose_y_0"), val = bool(true)]; tensor var_3940 = matmul(transpose_x = var_3940_transpose_x_0, transpose_y = var_3940_transpose_y_0, x = query_states_35, y = key_states_47_cast_fp16)[name = string("op_3940")]; fp16 var_3941_to_fp16 = const()[name = string("op_3941_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_17_cast_fp16 = mul(x = var_3940, y = var_3941_to_fp16)[name = string("attn_weights_17_cast_fp16")]; tensor attn_weights_19_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask)[name = string("attn_weights_19_cast_fp16")]; int32 var_3976 = const()[name = string("op_3976"), val = int32(-1)]; tensor var_3978_cast_fp16 = softmax(axis = var_3976, x = attn_weights_19_cast_fp16)[name = string("op_3978_cast_fp16")]; tensor concat_84 = const()[name = string("concat_84"), val = tensor([16, 64, 1024])]; tensor reshape_12_cast_fp16 = reshape(shape = concat_84, x = var_3978_cast_fp16)[name = string("reshape_12_cast_fp16")]; tensor concat_85 = const()[name = string("concat_85"), val = tensor([16, 1024, 128])]; tensor reshape_13_cast_fp16 = reshape(shape = concat_85, x = x_95_cast_fp16)[name = string("reshape_13_cast_fp16")]; bool matmul_4_transpose_x_0 = const()[name = string("matmul_4_transpose_x_0"), val = bool(false)]; bool matmul_4_transpose_y_0 = const()[name = string("matmul_4_transpose_y_0"), val = bool(false)]; tensor matmul_4_cast_fp16 = matmul(transpose_x = matmul_4_transpose_x_0, transpose_y = matmul_4_transpose_y_0, x = reshape_12_cast_fp16, y = reshape_13_cast_fp16)[name = string("matmul_4_cast_fp16")]; tensor concat_89 = const()[name = string("concat_89"), val = tensor([1, 16, 64, 128])]; tensor reshape_14_cast_fp16 = reshape(shape = concat_89, x = matmul_4_cast_fp16)[name = string("reshape_14_cast_fp16")]; tensor var_3990_perm_0 = const()[name = string("op_3990_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_4009 = const()[name = string("op_4009"), val = tensor([1, 64, 2048])]; tensor var_3990_cast_fp16 = transpose(perm = var_3990_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_211")]; tensor attn_output_45_cast_fp16 = reshape(shape = var_4009, x = var_3990_cast_fp16)[name = string("attn_output_45_cast_fp16")]; tensor var_4014 = const()[name = string("op_4014"), val = tensor([0, 2, 1])]; string var_4030_pad_type_0 = const()[name = string("op_4030_pad_type_0"), val = string("valid")]; int32 var_4030_groups_0 = const()[name = string("op_4030_groups_0"), val = int32(1)]; tensor var_4030_strides_0 = const()[name = string("op_4030_strides_0"), val = tensor([1])]; tensor var_4030_pad_0 = const()[name = string("op_4030_pad_0"), val = tensor([0, 0])]; tensor var_4030_dilations_0 = const()[name = string("op_4030_dilations_0"), val = tensor([1])]; tensor squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023473024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1026618816))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_4015_cast_fp16 = transpose(perm = var_4014, x = attn_output_45_cast_fp16)[name = string("transpose_210")]; tensor var_4030_cast_fp16 = conv(dilations = var_4030_dilations_0, groups = var_4030_groups_0, pad = var_4030_pad_0, pad_type = var_4030_pad_type_0, strides = var_4030_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_4015_cast_fp16)[name = string("op_4030_cast_fp16")]; tensor var_4034 = const()[name = string("op_4034"), val = tensor([0, 2, 1])]; tensor attn_output_49_cast_fp16 = transpose(perm = var_4034, x = var_4030_cast_fp16)[name = string("transpose_209")]; tensor hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = attn_output_49_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; tensor mean_39_axes_0 = const()[name = string("mean_39_axes_0"), val = tensor([-1])]; bool mean_39_keep_dims_0 = const()[name = string("mean_39_keep_dims_0"), val = bool(true)]; tensor mean_39_cast_fp16 = reduce_mean(axes = mean_39_axes_0, keep_dims = mean_39_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_39_cast_fp16")]; tensor input_83_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_39_cast_fp16)[name = string("input_83_cast_fp16")]; tensor var_4053_axes_0 = const()[name = string("op_4053_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1026651648)))]; fp16 var_4041_to_fp16 = const()[name = string("op_4041_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4053_cast_fp16 = layer_norm(axes = var_4053_axes_0, epsilon = var_4041_to_fp16, gamma = model_model_layers_4_post_attention_layernorm_weight_to_fp16, x = input_83_cast_fp16)[name = string("op_4053_cast_fp16")]; tensor var_4067 = const()[name = string("op_4067"), val = tensor([0, 2, 1])]; tensor input_85_axes_0 = const()[name = string("input_85_axes_0"), val = tensor([2])]; tensor var_4068 = transpose(perm = var_4067, x = var_4053_cast_fp16)[name = string("transpose_208")]; tensor input_85 = expand_dims(axes = input_85_axes_0, x = var_4068)[name = string("input_85")]; string input_87_pad_type_0 = const()[name = string("input_87_pad_type_0"), val = string("valid")]; tensor input_87_strides_0 = const()[name = string("input_87_strides_0"), val = tensor([1, 1])]; tensor input_87_pad_0 = const()[name = string("input_87_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_87_dilations_0 = const()[name = string("input_87_dilations_0"), val = tensor([1, 1])]; int32 input_87_groups_0 = const()[name = string("input_87_groups_0"), val = int32(1)]; tensor input_87 = conv(dilations = input_87_dilations_0, groups = input_87_groups_0, pad = input_87_pad_0, pad_type = input_87_pad_type_0, strides = input_87_strides_0, weight = model_model_layers_4_mlp_gate_proj_weight_palettized, x = input_85)[name = string("input_87")]; string b_9_pad_type_0 = const()[name = string("b_9_pad_type_0"), val = string("valid")]; tensor b_9_strides_0 = const()[name = string("b_9_strides_0"), val = tensor([1, 1])]; tensor b_9_pad_0 = const()[name = string("b_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_9_dilations_0 = const()[name = string("b_9_dilations_0"), val = tensor([1, 1])]; int32 b_9_groups_0 = const()[name = string("b_9_groups_0"), val = int32(1)]; tensor b_9 = conv(dilations = b_9_dilations_0, groups = b_9_groups_0, pad = b_9_pad_0, pad_type = b_9_pad_type_0, strides = b_9_strides_0, weight = model_model_layers_4_mlp_up_proj_weight_palettized, x = input_85)[name = string("b_9")]; tensor c_9 = silu(x = input_87)[name = string("c_9")]; tensor input_89 = mul(x = c_9, y = b_9)[name = string("input_89")]; string e_9_pad_type_0 = const()[name = string("e_9_pad_type_0"), val = string("valid")]; tensor e_9_strides_0 = const()[name = string("e_9_strides_0"), val = tensor([1, 1])]; tensor e_9_pad_0 = const()[name = string("e_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_9_dilations_0 = const()[name = string("e_9_dilations_0"), val = tensor([1, 1])]; int32 e_9_groups_0 = const()[name = string("e_9_groups_0"), val = int32(1)]; tensor e_9 = conv(dilations = e_9_dilations_0, groups = e_9_groups_0, pad = e_9_pad_0, pad_type = e_9_pad_type_0, strides = e_9_strides_0, weight = model_model_layers_4_mlp_down_proj_weight_palettized, x = input_89)[name = string("e_9")]; tensor var_4090_axes_0 = const()[name = string("op_4090_axes_0"), val = tensor([2])]; tensor var_4090 = squeeze(axes = var_4090_axes_0, x = e_9)[name = string("op_4090")]; tensor var_4091 = const()[name = string("op_4091"), val = tensor([0, 2, 1])]; tensor var_4092 = transpose(perm = var_4091, x = var_4090)[name = string("transpose_207")]; tensor hidden_states_31_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_4092)[name = string("hidden_states_31_cast_fp16")]; tensor mean_41_axes_0 = const()[name = string("mean_41_axes_0"), val = tensor([-1])]; bool mean_41_keep_dims_0 = const()[name = string("mean_41_keep_dims_0"), val = bool(true)]; tensor mean_41_cast_fp16 = reduce_mean(axes = mean_41_axes_0, keep_dims = mean_41_keep_dims_0, x = hidden_states_31_cast_fp16)[name = string("mean_41_cast_fp16")]; tensor input_91_cast_fp16 = sub(x = hidden_states_31_cast_fp16, y = mean_41_cast_fp16)[name = string("input_91_cast_fp16")]; tensor var_4110_axes_0 = const()[name = string("op_4110_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1026655808)))]; fp16 var_4098_to_fp16 = const()[name = string("op_4098_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4110_cast_fp16 = layer_norm(axes = var_4110_axes_0, epsilon = var_4098_to_fp16, gamma = model_model_layers_5_input_layernorm_weight_to_fp16, x = input_91_cast_fp16)[name = string("op_4110_cast_fp16")]; tensor var_4122 = const()[name = string("op_4122"), val = tensor([0, 2, 1])]; tensor var_4125_axes_0 = const()[name = string("op_4125_axes_0"), val = tensor([2])]; tensor var_4123 = transpose(perm = var_4122, x = var_4110_cast_fp16)[name = string("transpose_206")]; tensor var_4125 = expand_dims(axes = var_4125_axes_0, x = var_4123)[name = string("op_4125")]; string query_states_41_pad_type_0 = const()[name = string("query_states_41_pad_type_0"), val = string("valid")]; tensor query_states_41_strides_0 = const()[name = string("query_states_41_strides_0"), val = tensor([1, 1])]; tensor query_states_41_pad_0 = const()[name = string("query_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_41_dilations_0 = const()[name = string("query_states_41_dilations_0"), val = tensor([1, 1])]; int32 query_states_41_groups_0 = const()[name = string("query_states_41_groups_0"), val = int32(1)]; tensor query_states_41 = conv(dilations = query_states_41_dilations_0, groups = query_states_41_groups_0, pad = query_states_41_pad_0, pad_type = query_states_41_pad_type_0, strides = query_states_41_strides_0, weight = model_model_layers_5_self_attn_q_proj_weight_palettized, x = var_4125)[name = string("query_states_41")]; string key_states_51_pad_type_0 = const()[name = string("key_states_51_pad_type_0"), val = string("valid")]; tensor key_states_51_strides_0 = const()[name = string("key_states_51_strides_0"), val = tensor([1, 1])]; tensor key_states_51_pad_0 = const()[name = string("key_states_51_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_51_dilations_0 = const()[name = string("key_states_51_dilations_0"), val = tensor([1, 1])]; int32 key_states_51_groups_0 = const()[name = string("key_states_51_groups_0"), val = int32(1)]; tensor key_states_51 = conv(dilations = key_states_51_dilations_0, groups = key_states_51_groups_0, pad = key_states_51_pad_0, pad_type = key_states_51_pad_type_0, strides = key_states_51_strides_0, weight = model_model_layers_5_self_attn_k_proj_weight_palettized, x = var_4125)[name = string("key_states_51")]; string value_states_41_pad_type_0 = const()[name = string("value_states_41_pad_type_0"), val = string("valid")]; tensor value_states_41_strides_0 = const()[name = string("value_states_41_strides_0"), val = tensor([1, 1])]; tensor value_states_41_pad_0 = const()[name = string("value_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_41_dilations_0 = const()[name = string("value_states_41_dilations_0"), val = tensor([1, 1])]; int32 value_states_41_groups_0 = const()[name = string("value_states_41_groups_0"), val = int32(1)]; tensor value_states_41 = conv(dilations = value_states_41_dilations_0, groups = value_states_41_groups_0, pad = value_states_41_pad_0, pad_type = value_states_41_pad_type_0, strides = value_states_41_strides_0, weight = model_model_layers_5_self_attn_v_proj_weight_palettized, x = var_4125)[name = string("value_states_41")]; tensor var_4167 = const()[name = string("op_4167"), val = tensor([1, 16, 128, 64])]; tensor var_4168 = reshape(shape = var_4167, x = query_states_41)[name = string("op_4168")]; tensor var_4173 = const()[name = string("op_4173"), val = tensor([0, 1, 3, 2])]; tensor var_4178 = const()[name = string("op_4178"), val = tensor([1, 8, 128, 64])]; tensor var_4179 = reshape(shape = var_4178, x = key_states_51)[name = string("op_4179")]; tensor var_4184 = const()[name = string("op_4184"), val = tensor([0, 1, 3, 2])]; tensor var_4189 = const()[name = string("op_4189"), val = tensor([1, 8, 128, 64])]; tensor var_4190 = reshape(shape = var_4189, x = value_states_41)[name = string("op_4190")]; tensor var_4195 = const()[name = string("op_4195"), val = tensor([0, 1, 3, 2])]; tensor mean_43_axes_0 = const()[name = string("mean_43_axes_0"), val = tensor([-1])]; bool mean_43_keep_dims_0 = const()[name = string("mean_43_keep_dims_0"), val = bool(true)]; tensor x_101 = transpose(perm = var_4173, x = var_4168)[name = string("transpose_205")]; tensor mean_43 = reduce_mean(axes = mean_43_axes_0, keep_dims = mean_43_keep_dims_0, x = x_101)[name = string("mean_43")]; tensor input_95 = sub(x = x_101, y = mean_43)[name = string("input_95")]; tensor var_4212_axes_0 = const()[name = string("op_4212_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_5_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1026659968)))]; fp16 var_4200_to_fp16 = const()[name = string("op_4200_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4212_cast_fp16 = layer_norm(axes = var_4212_axes_0, epsilon = var_4200_to_fp16, gamma = model_model_layers_5_self_attn_q_norm_weight_to_fp16, x = input_95)[name = string("op_4212_cast_fp16")]; tensor mean_45_axes_0 = const()[name = string("mean_45_axes_0"), val = tensor([-1])]; bool mean_45_keep_dims_0 = const()[name = string("mean_45_keep_dims_0"), val = bool(true)]; tensor x_103 = transpose(perm = var_4184, x = var_4179)[name = string("transpose_204")]; tensor mean_45 = reduce_mean(axes = mean_45_axes_0, keep_dims = mean_45_keep_dims_0, x = x_103)[name = string("mean_45")]; tensor input_97 = sub(x = x_103, y = mean_45)[name = string("input_97")]; tensor var_4230_axes_0 = const()[name = string("op_4230_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_5_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1026660288)))]; fp16 var_4218_to_fp16 = const()[name = string("op_4218_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4230_cast_fp16 = layer_norm(axes = var_4230_axes_0, epsilon = var_4218_to_fp16, gamma = model_model_layers_5_self_attn_k_norm_weight_to_fp16, x = input_97)[name = string("op_4230_cast_fp16")]; tensor var_4245 = mul(x = var_4212_cast_fp16, y = cos_5)[name = string("op_4245")]; tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = var_4212_cast_fp16)[name = string("x1_21")]; tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = var_4212_cast_fp16)[name = string("x2_21")]; fp16 const_117_promoted = const()[name = string("const_117_promoted"), val = fp16(-0x1p+0)]; tensor var_4266 = mul(x = x2_21, y = const_117_promoted)[name = string("op_4266")]; int32 var_4268 = const()[name = string("op_4268"), val = int32(-1)]; bool var_4269_interleave_0 = const()[name = string("op_4269_interleave_0"), val = bool(false)]; tensor var_4269 = concat(axis = var_4268, interleave = var_4269_interleave_0, values = (var_4266, x1_21))[name = string("op_4269")]; tensor var_4270 = mul(x = var_4269, y = sin_5)[name = string("op_4270")]; tensor query_states_43 = add(x = var_4245, y = var_4270)[name = string("query_states_43")]; tensor var_4273 = mul(x = var_4230_cast_fp16, y = cos_5)[name = string("op_4273")]; tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = var_4230_cast_fp16)[name = string("x1_23")]; tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = var_4230_cast_fp16)[name = string("x2_23")]; fp16 const_120_promoted = const()[name = string("const_120_promoted"), val = fp16(-0x1p+0)]; tensor var_4294 = mul(x = x2_23, y = const_120_promoted)[name = string("op_4294")]; int32 var_4296 = const()[name = string("op_4296"), val = int32(-1)]; bool var_4297_interleave_0 = const()[name = string("op_4297_interleave_0"), val = bool(false)]; tensor var_4297 = concat(axis = var_4296, interleave = var_4297_interleave_0, values = (var_4294, x1_23))[name = string("op_4297")]; tensor var_4298 = mul(x = var_4297, y = sin_5)[name = string("op_4298")]; tensor key_states_53 = add(x = var_4273, y = var_4298)[name = string("key_states_53")]; tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([5])]; tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([6])]; int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_92")]; tensor concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor([0])]; tensor concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor([0])]; int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (expand_dims_64, concat_93_values1_0, var_1760, concat_93_values3_0))[name = string("concat_93")]; tensor model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_92, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_93, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = key_states_53, x = coreml_update_state_65)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_66_write_state")]; tensor coreml_update_state_66 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_66")]; tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([33])]; tensor expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor([0])]; tensor expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor([0])]; tensor expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor([34])]; int32 concat_96_axis_0 = const()[name = string("concat_96_axis_0"), val = int32(0)]; bool concat_96_interleave_0 = const()[name = string("concat_96_interleave_0"), val = bool(false)]; tensor concat_96 = concat(axis = concat_96_axis_0, interleave = concat_96_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_96")]; tensor concat_97_values1_0 = const()[name = string("concat_97_values1_0"), val = tensor([0])]; tensor concat_97_values3_0 = const()[name = string("concat_97_values3_0"), val = tensor([0])]; int32 concat_97_axis_0 = const()[name = string("concat_97_axis_0"), val = int32(0)]; bool concat_97_interleave_0 = const()[name = string("concat_97_interleave_0"), val = bool(false)]; tensor concat_97 = concat(axis = concat_97_axis_0, interleave = concat_97_interleave_0, values = (expand_dims_70, concat_97_values1_0, var_1760, concat_97_values3_0))[name = string("concat_97")]; tensor model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_43 = transpose(perm = var_4195, x = var_4190)[name = string("transpose_203")]; tensor model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_96, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_97, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = value_states_43, x = coreml_update_state_66)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_67_write_state")]; tensor coreml_update_state_67 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_67")]; tensor var_4369_begin_0 = const()[name = string("op_4369_begin_0"), val = tensor([5, 0, 0, 0])]; tensor var_4369_end_0 = const()[name = string("op_4369_end_0"), val = tensor([6, 8, 1024, 128])]; tensor var_4369_end_mask_0 = const()[name = string("op_4369_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4369_cast_fp16 = slice_by_index(begin = var_4369_begin_0, end = var_4369_end_0, end_mask = var_4369_end_mask_0, x = coreml_update_state_67)[name = string("op_4369_cast_fp16")]; tensor K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor([0])]; tensor K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_4369_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")]; tensor var_4376_begin_0 = const()[name = string("op_4376_begin_0"), val = tensor([33, 0, 0, 0])]; tensor var_4376_end_0 = const()[name = string("op_4376_end_0"), val = tensor([34, 8, 1024, 128])]; tensor var_4376_end_mask_0 = const()[name = string("op_4376_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4376_cast_fp16 = slice_by_index(begin = var_4376_begin_0, end = var_4376_end_0, end_mask = var_4376_end_mask_0, x = coreml_update_state_67)[name = string("op_4376_cast_fp16")]; tensor V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor([0])]; tensor V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_4376_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")]; tensor x_107_axes_0 = const()[name = string("x_107_axes_0"), val = tensor([1])]; tensor x_107_cast_fp16 = expand_dims(axes = x_107_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_107_cast_fp16")]; tensor var_4405 = const()[name = string("op_4405"), val = tensor([1, 2, 1, 1])]; tensor x_109_cast_fp16 = tile(reps = var_4405, x = x_107_cast_fp16)[name = string("x_109_cast_fp16")]; tensor var_4417 = const()[name = string("op_4417"), val = tensor([1, -1, 1024, 128])]; tensor key_states_57_cast_fp16 = reshape(shape = var_4417, x = x_109_cast_fp16)[name = string("key_states_57_cast_fp16")]; tensor x_113_axes_0 = const()[name = string("x_113_axes_0"), val = tensor([1])]; tensor x_113_cast_fp16 = expand_dims(axes = x_113_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_113_cast_fp16")]; tensor var_4425 = const()[name = string("op_4425"), val = tensor([1, 2, 1, 1])]; tensor x_115_cast_fp16 = tile(reps = var_4425, x = x_113_cast_fp16)[name = string("x_115_cast_fp16")]; bool var_4452_transpose_x_0 = const()[name = string("op_4452_transpose_x_0"), val = bool(false)]; bool var_4452_transpose_y_0 = const()[name = string("op_4452_transpose_y_0"), val = bool(true)]; tensor var_4452 = matmul(transpose_x = var_4452_transpose_x_0, transpose_y = var_4452_transpose_y_0, x = query_states_43, y = key_states_57_cast_fp16)[name = string("op_4452")]; fp16 var_4453_to_fp16 = const()[name = string("op_4453_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_21_cast_fp16 = mul(x = var_4452, y = var_4453_to_fp16)[name = string("attn_weights_21_cast_fp16")]; tensor attn_weights_23_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask)[name = string("attn_weights_23_cast_fp16")]; int32 var_4488 = const()[name = string("op_4488"), val = int32(-1)]; tensor var_4490_cast_fp16 = softmax(axis = var_4488, x = attn_weights_23_cast_fp16)[name = string("op_4490_cast_fp16")]; tensor concat_102 = const()[name = string("concat_102"), val = tensor([16, 64, 1024])]; tensor reshape_15_cast_fp16 = reshape(shape = concat_102, x = var_4490_cast_fp16)[name = string("reshape_15_cast_fp16")]; tensor concat_103 = const()[name = string("concat_103"), val = tensor([16, 1024, 128])]; tensor reshape_16_cast_fp16 = reshape(shape = concat_103, x = x_115_cast_fp16)[name = string("reshape_16_cast_fp16")]; bool matmul_5_transpose_x_0 = const()[name = string("matmul_5_transpose_x_0"), val = bool(false)]; bool matmul_5_transpose_y_0 = const()[name = string("matmul_5_transpose_y_0"), val = bool(false)]; tensor matmul_5_cast_fp16 = matmul(transpose_x = matmul_5_transpose_x_0, transpose_y = matmul_5_transpose_y_0, x = reshape_15_cast_fp16, y = reshape_16_cast_fp16)[name = string("matmul_5_cast_fp16")]; tensor concat_107 = const()[name = string("concat_107"), val = tensor([1, 16, 64, 128])]; tensor reshape_17_cast_fp16 = reshape(shape = concat_107, x = matmul_5_cast_fp16)[name = string("reshape_17_cast_fp16")]; tensor var_4502_perm_0 = const()[name = string("op_4502_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_4521 = const()[name = string("op_4521"), val = tensor([1, 64, 2048])]; tensor var_4502_cast_fp16 = transpose(perm = var_4502_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_202")]; tensor attn_output_55_cast_fp16 = reshape(shape = var_4521, x = var_4502_cast_fp16)[name = string("attn_output_55_cast_fp16")]; tensor var_4526 = const()[name = string("op_4526"), val = tensor([0, 2, 1])]; string var_4542_pad_type_0 = const()[name = string("op_4542_pad_type_0"), val = string("valid")]; int32 var_4542_groups_0 = const()[name = string("op_4542_groups_0"), val = int32(1)]; tensor var_4542_strides_0 = const()[name = string("op_4542_strides_0"), val = tensor([1])]; tensor var_4542_pad_0 = const()[name = string("op_4542_pad_0"), val = tensor([0, 0])]; tensor var_4542_dilations_0 = const()[name = string("op_4542_dilations_0"), val = tensor([1])]; tensor squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1026660608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1029806400))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_4527_cast_fp16 = transpose(perm = var_4526, x = attn_output_55_cast_fp16)[name = string("transpose_201")]; tensor var_4542_cast_fp16 = conv(dilations = var_4542_dilations_0, groups = var_4542_groups_0, pad = var_4542_pad_0, pad_type = var_4542_pad_type_0, strides = var_4542_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_4527_cast_fp16)[name = string("op_4542_cast_fp16")]; tensor var_4546 = const()[name = string("op_4546"), val = tensor([0, 2, 1])]; tensor attn_output_59_cast_fp16 = transpose(perm = var_4546, x = var_4542_cast_fp16)[name = string("transpose_200")]; tensor hidden_states_35_cast_fp16 = add(x = hidden_states_31_cast_fp16, y = attn_output_59_cast_fp16)[name = string("hidden_states_35_cast_fp16")]; tensor mean_47_axes_0 = const()[name = string("mean_47_axes_0"), val = tensor([-1])]; bool mean_47_keep_dims_0 = const()[name = string("mean_47_keep_dims_0"), val = bool(true)]; tensor mean_47_cast_fp16 = reduce_mean(axes = mean_47_axes_0, keep_dims = mean_47_keep_dims_0, x = hidden_states_35_cast_fp16)[name = string("mean_47_cast_fp16")]; tensor input_101_cast_fp16 = sub(x = hidden_states_35_cast_fp16, y = mean_47_cast_fp16)[name = string("input_101_cast_fp16")]; tensor var_4565_axes_0 = const()[name = string("op_4565_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1029839232)))]; fp16 var_4553_to_fp16 = const()[name = string("op_4553_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4565_cast_fp16 = layer_norm(axes = var_4565_axes_0, epsilon = var_4553_to_fp16, gamma = model_model_layers_5_post_attention_layernorm_weight_to_fp16, x = input_101_cast_fp16)[name = string("op_4565_cast_fp16")]; tensor var_4579 = const()[name = string("op_4579"), val = tensor([0, 2, 1])]; tensor input_103_axes_0 = const()[name = string("input_103_axes_0"), val = tensor([2])]; tensor var_4580 = transpose(perm = var_4579, x = var_4565_cast_fp16)[name = string("transpose_199")]; tensor input_103 = expand_dims(axes = input_103_axes_0, x = var_4580)[name = string("input_103")]; string input_105_pad_type_0 = const()[name = string("input_105_pad_type_0"), val = string("valid")]; tensor input_105_strides_0 = const()[name = string("input_105_strides_0"), val = tensor([1, 1])]; tensor input_105_pad_0 = const()[name = string("input_105_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_105_dilations_0 = const()[name = string("input_105_dilations_0"), val = tensor([1, 1])]; int32 input_105_groups_0 = const()[name = string("input_105_groups_0"), val = int32(1)]; tensor input_105 = conv(dilations = input_105_dilations_0, groups = input_105_groups_0, pad = input_105_pad_0, pad_type = input_105_pad_type_0, strides = input_105_strides_0, weight = model_model_layers_5_mlp_gate_proj_weight_palettized, x = input_103)[name = string("input_105")]; string b_11_pad_type_0 = const()[name = string("b_11_pad_type_0"), val = string("valid")]; tensor b_11_strides_0 = const()[name = string("b_11_strides_0"), val = tensor([1, 1])]; tensor b_11_pad_0 = const()[name = string("b_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_11_dilations_0 = const()[name = string("b_11_dilations_0"), val = tensor([1, 1])]; int32 b_11_groups_0 = const()[name = string("b_11_groups_0"), val = int32(1)]; tensor b_11 = conv(dilations = b_11_dilations_0, groups = b_11_groups_0, pad = b_11_pad_0, pad_type = b_11_pad_type_0, strides = b_11_strides_0, weight = model_model_layers_5_mlp_up_proj_weight_palettized, x = input_103)[name = string("b_11")]; tensor c_11 = silu(x = input_105)[name = string("c_11")]; tensor input_107 = mul(x = c_11, y = b_11)[name = string("input_107")]; string e_11_pad_type_0 = const()[name = string("e_11_pad_type_0"), val = string("valid")]; tensor e_11_strides_0 = const()[name = string("e_11_strides_0"), val = tensor([1, 1])]; tensor e_11_pad_0 = const()[name = string("e_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_11_dilations_0 = const()[name = string("e_11_dilations_0"), val = tensor([1, 1])]; int32 e_11_groups_0 = const()[name = string("e_11_groups_0"), val = int32(1)]; tensor e_11 = conv(dilations = e_11_dilations_0, groups = e_11_groups_0, pad = e_11_pad_0, pad_type = e_11_pad_type_0, strides = e_11_strides_0, weight = model_model_layers_5_mlp_down_proj_weight_palettized, x = input_107)[name = string("e_11")]; tensor var_4602_axes_0 = const()[name = string("op_4602_axes_0"), val = tensor([2])]; tensor var_4602 = squeeze(axes = var_4602_axes_0, x = e_11)[name = string("op_4602")]; tensor var_4603 = const()[name = string("op_4603"), val = tensor([0, 2, 1])]; tensor var_4604 = transpose(perm = var_4603, x = var_4602)[name = string("transpose_198")]; tensor hidden_states_37_cast_fp16 = add(x = hidden_states_35_cast_fp16, y = var_4604)[name = string("hidden_states_37_cast_fp16")]; tensor mean_49_axes_0 = const()[name = string("mean_49_axes_0"), val = tensor([-1])]; bool mean_49_keep_dims_0 = const()[name = string("mean_49_keep_dims_0"), val = bool(true)]; tensor mean_49_cast_fp16 = reduce_mean(axes = mean_49_axes_0, keep_dims = mean_49_keep_dims_0, x = hidden_states_37_cast_fp16)[name = string("mean_49_cast_fp16")]; tensor input_109_cast_fp16 = sub(x = hidden_states_37_cast_fp16, y = mean_49_cast_fp16)[name = string("input_109_cast_fp16")]; tensor var_4622_axes_0 = const()[name = string("op_4622_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1029843392)))]; fp16 var_4610_to_fp16 = const()[name = string("op_4610_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4622_cast_fp16 = layer_norm(axes = var_4622_axes_0, epsilon = var_4610_to_fp16, gamma = model_model_layers_6_input_layernorm_weight_to_fp16, x = input_109_cast_fp16)[name = string("op_4622_cast_fp16")]; tensor var_4634 = const()[name = string("op_4634"), val = tensor([0, 2, 1])]; tensor var_4637_axes_0 = const()[name = string("op_4637_axes_0"), val = tensor([2])]; tensor var_4635 = transpose(perm = var_4634, x = var_4622_cast_fp16)[name = string("transpose_197")]; tensor var_4637 = expand_dims(axes = var_4637_axes_0, x = var_4635)[name = string("op_4637")]; string query_states_49_pad_type_0 = const()[name = string("query_states_49_pad_type_0"), val = string("valid")]; tensor query_states_49_strides_0 = const()[name = string("query_states_49_strides_0"), val = tensor([1, 1])]; tensor query_states_49_pad_0 = const()[name = string("query_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_49_dilations_0 = const()[name = string("query_states_49_dilations_0"), val = tensor([1, 1])]; int32 query_states_49_groups_0 = const()[name = string("query_states_49_groups_0"), val = int32(1)]; tensor query_states_49 = conv(dilations = query_states_49_dilations_0, groups = query_states_49_groups_0, pad = query_states_49_pad_0, pad_type = query_states_49_pad_type_0, strides = query_states_49_strides_0, weight = model_model_layers_6_self_attn_q_proj_weight_palettized, x = var_4637)[name = string("query_states_49")]; string key_states_61_pad_type_0 = const()[name = string("key_states_61_pad_type_0"), val = string("valid")]; tensor key_states_61_strides_0 = const()[name = string("key_states_61_strides_0"), val = tensor([1, 1])]; tensor key_states_61_pad_0 = const()[name = string("key_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_61_dilations_0 = const()[name = string("key_states_61_dilations_0"), val = tensor([1, 1])]; int32 key_states_61_groups_0 = const()[name = string("key_states_61_groups_0"), val = int32(1)]; tensor key_states_61 = conv(dilations = key_states_61_dilations_0, groups = key_states_61_groups_0, pad = key_states_61_pad_0, pad_type = key_states_61_pad_type_0, strides = key_states_61_strides_0, weight = model_model_layers_6_self_attn_k_proj_weight_palettized, x = var_4637)[name = string("key_states_61")]; string value_states_49_pad_type_0 = const()[name = string("value_states_49_pad_type_0"), val = string("valid")]; tensor value_states_49_strides_0 = const()[name = string("value_states_49_strides_0"), val = tensor([1, 1])]; tensor value_states_49_pad_0 = const()[name = string("value_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_49_dilations_0 = const()[name = string("value_states_49_dilations_0"), val = tensor([1, 1])]; int32 value_states_49_groups_0 = const()[name = string("value_states_49_groups_0"), val = int32(1)]; tensor value_states_49 = conv(dilations = value_states_49_dilations_0, groups = value_states_49_groups_0, pad = value_states_49_pad_0, pad_type = value_states_49_pad_type_0, strides = value_states_49_strides_0, weight = model_model_layers_6_self_attn_v_proj_weight_palettized, x = var_4637)[name = string("value_states_49")]; tensor var_4679 = const()[name = string("op_4679"), val = tensor([1, 16, 128, 64])]; tensor var_4680 = reshape(shape = var_4679, x = query_states_49)[name = string("op_4680")]; tensor var_4685 = const()[name = string("op_4685"), val = tensor([0, 1, 3, 2])]; tensor var_4690 = const()[name = string("op_4690"), val = tensor([1, 8, 128, 64])]; tensor var_4691 = reshape(shape = var_4690, x = key_states_61)[name = string("op_4691")]; tensor var_4696 = const()[name = string("op_4696"), val = tensor([0, 1, 3, 2])]; tensor var_4701 = const()[name = string("op_4701"), val = tensor([1, 8, 128, 64])]; tensor var_4702 = reshape(shape = var_4701, x = value_states_49)[name = string("op_4702")]; tensor var_4707 = const()[name = string("op_4707"), val = tensor([0, 1, 3, 2])]; tensor mean_51_axes_0 = const()[name = string("mean_51_axes_0"), val = tensor([-1])]; bool mean_51_keep_dims_0 = const()[name = string("mean_51_keep_dims_0"), val = bool(true)]; tensor x_121 = transpose(perm = var_4685, x = var_4680)[name = string("transpose_196")]; tensor mean_51 = reduce_mean(axes = mean_51_axes_0, keep_dims = mean_51_keep_dims_0, x = x_121)[name = string("mean_51")]; tensor input_113 = sub(x = x_121, y = mean_51)[name = string("input_113")]; tensor var_4724_axes_0 = const()[name = string("op_4724_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_6_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1029847552)))]; fp16 var_4712_to_fp16 = const()[name = string("op_4712_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4724_cast_fp16 = layer_norm(axes = var_4724_axes_0, epsilon = var_4712_to_fp16, gamma = model_model_layers_6_self_attn_q_norm_weight_to_fp16, x = input_113)[name = string("op_4724_cast_fp16")]; tensor mean_53_axes_0 = const()[name = string("mean_53_axes_0"), val = tensor([-1])]; bool mean_53_keep_dims_0 = const()[name = string("mean_53_keep_dims_0"), val = bool(true)]; tensor x_123 = transpose(perm = var_4696, x = var_4691)[name = string("transpose_195")]; tensor mean_53 = reduce_mean(axes = mean_53_axes_0, keep_dims = mean_53_keep_dims_0, x = x_123)[name = string("mean_53")]; tensor input_115 = sub(x = x_123, y = mean_53)[name = string("input_115")]; tensor var_4742_axes_0 = const()[name = string("op_4742_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_6_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1029847872)))]; fp16 var_4730_to_fp16 = const()[name = string("op_4730_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4742_cast_fp16 = layer_norm(axes = var_4742_axes_0, epsilon = var_4730_to_fp16, gamma = model_model_layers_6_self_attn_k_norm_weight_to_fp16, x = input_115)[name = string("op_4742_cast_fp16")]; tensor var_4757 = mul(x = var_4724_cast_fp16, y = cos_5)[name = string("op_4757")]; tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = var_4724_cast_fp16)[name = string("x1_25")]; tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = var_4724_cast_fp16)[name = string("x2_25")]; fp16 const_139_promoted = const()[name = string("const_139_promoted"), val = fp16(-0x1p+0)]; tensor var_4778 = mul(x = x2_25, y = const_139_promoted)[name = string("op_4778")]; int32 var_4780 = const()[name = string("op_4780"), val = int32(-1)]; bool var_4781_interleave_0 = const()[name = string("op_4781_interleave_0"), val = bool(false)]; tensor var_4781 = concat(axis = var_4780, interleave = var_4781_interleave_0, values = (var_4778, x1_25))[name = string("op_4781")]; tensor var_4782 = mul(x = var_4781, y = sin_5)[name = string("op_4782")]; tensor query_states_51 = add(x = var_4757, y = var_4782)[name = string("query_states_51")]; tensor var_4785 = mul(x = var_4742_cast_fp16, y = cos_5)[name = string("op_4785")]; tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_27 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = var_4742_cast_fp16)[name = string("x1_27")]; tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_27 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = var_4742_cast_fp16)[name = string("x2_27")]; fp16 const_142_promoted = const()[name = string("const_142_promoted"), val = fp16(-0x1p+0)]; tensor var_4806 = mul(x = x2_27, y = const_142_promoted)[name = string("op_4806")]; int32 var_4808 = const()[name = string("op_4808"), val = int32(-1)]; bool var_4809_interleave_0 = const()[name = string("op_4809_interleave_0"), val = bool(false)]; tensor var_4809 = concat(axis = var_4808, interleave = var_4809_interleave_0, values = (var_4806, x1_27))[name = string("op_4809")]; tensor var_4810 = mul(x = var_4809, y = sin_5)[name = string("op_4810")]; tensor key_states_63 = add(x = var_4785, y = var_4810)[name = string("key_states_63")]; tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([6])]; tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([7])]; int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_110")]; tensor concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor([0])]; tensor concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor([0])]; int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (expand_dims_76, concat_111_values1_0, var_1760, concat_111_values3_0))[name = string("concat_111")]; tensor model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_110, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_111, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = key_states_63, x = coreml_update_state_67)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_68_write_state")]; tensor coreml_update_state_68 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_68")]; tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([34])]; tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([35])]; int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_114")]; tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_82, concat_115_values1_0, var_1760, concat_115_values3_0))[name = string("concat_115")]; tensor model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_51 = transpose(perm = var_4707, x = var_4702)[name = string("transpose_194")]; tensor model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_114, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_115, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = value_states_51, x = coreml_update_state_68)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_69_write_state")]; tensor coreml_update_state_69 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_69")]; tensor var_4881_begin_0 = const()[name = string("op_4881_begin_0"), val = tensor([6, 0, 0, 0])]; tensor var_4881_end_0 = const()[name = string("op_4881_end_0"), val = tensor([7, 8, 1024, 128])]; tensor var_4881_end_mask_0 = const()[name = string("op_4881_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4881_cast_fp16 = slice_by_index(begin = var_4881_begin_0, end = var_4881_end_0, end_mask = var_4881_end_mask_0, x = coreml_update_state_69)[name = string("op_4881_cast_fp16")]; tensor K_layer_cache_13_axes_0 = const()[name = string("K_layer_cache_13_axes_0"), val = tensor([0])]; tensor K_layer_cache_13_cast_fp16 = squeeze(axes = K_layer_cache_13_axes_0, x = var_4881_cast_fp16)[name = string("K_layer_cache_13_cast_fp16")]; tensor var_4888_begin_0 = const()[name = string("op_4888_begin_0"), val = tensor([34, 0, 0, 0])]; tensor var_4888_end_0 = const()[name = string("op_4888_end_0"), val = tensor([35, 8, 1024, 128])]; tensor var_4888_end_mask_0 = const()[name = string("op_4888_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4888_cast_fp16 = slice_by_index(begin = var_4888_begin_0, end = var_4888_end_0, end_mask = var_4888_end_mask_0, x = coreml_update_state_69)[name = string("op_4888_cast_fp16")]; tensor V_layer_cache_13_axes_0 = const()[name = string("V_layer_cache_13_axes_0"), val = tensor([0])]; tensor V_layer_cache_13_cast_fp16 = squeeze(axes = V_layer_cache_13_axes_0, x = var_4888_cast_fp16)[name = string("V_layer_cache_13_cast_fp16")]; tensor x_127_axes_0 = const()[name = string("x_127_axes_0"), val = tensor([1])]; tensor x_127_cast_fp16 = expand_dims(axes = x_127_axes_0, x = K_layer_cache_13_cast_fp16)[name = string("x_127_cast_fp16")]; tensor var_4917 = const()[name = string("op_4917"), val = tensor([1, 2, 1, 1])]; tensor x_129_cast_fp16 = tile(reps = var_4917, x = x_127_cast_fp16)[name = string("x_129_cast_fp16")]; tensor var_4929 = const()[name = string("op_4929"), val = tensor([1, -1, 1024, 128])]; tensor key_states_67_cast_fp16 = reshape(shape = var_4929, x = x_129_cast_fp16)[name = string("key_states_67_cast_fp16")]; tensor x_133_axes_0 = const()[name = string("x_133_axes_0"), val = tensor([1])]; tensor x_133_cast_fp16 = expand_dims(axes = x_133_axes_0, x = V_layer_cache_13_cast_fp16)[name = string("x_133_cast_fp16")]; tensor var_4937 = const()[name = string("op_4937"), val = tensor([1, 2, 1, 1])]; tensor x_135_cast_fp16 = tile(reps = var_4937, x = x_133_cast_fp16)[name = string("x_135_cast_fp16")]; bool var_4964_transpose_x_0 = const()[name = string("op_4964_transpose_x_0"), val = bool(false)]; bool var_4964_transpose_y_0 = const()[name = string("op_4964_transpose_y_0"), val = bool(true)]; tensor var_4964 = matmul(transpose_x = var_4964_transpose_x_0, transpose_y = var_4964_transpose_y_0, x = query_states_51, y = key_states_67_cast_fp16)[name = string("op_4964")]; fp16 var_4965_to_fp16 = const()[name = string("op_4965_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_25_cast_fp16 = mul(x = var_4964, y = var_4965_to_fp16)[name = string("attn_weights_25_cast_fp16")]; tensor attn_weights_27_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask)[name = string("attn_weights_27_cast_fp16")]; int32 var_5000 = const()[name = string("op_5000"), val = int32(-1)]; tensor var_5002_cast_fp16 = softmax(axis = var_5000, x = attn_weights_27_cast_fp16)[name = string("op_5002_cast_fp16")]; tensor concat_120 = const()[name = string("concat_120"), val = tensor([16, 64, 1024])]; tensor reshape_18_cast_fp16 = reshape(shape = concat_120, x = var_5002_cast_fp16)[name = string("reshape_18_cast_fp16")]; tensor concat_121 = const()[name = string("concat_121"), val = tensor([16, 1024, 128])]; tensor reshape_19_cast_fp16 = reshape(shape = concat_121, x = x_135_cast_fp16)[name = string("reshape_19_cast_fp16")]; bool matmul_6_transpose_x_0 = const()[name = string("matmul_6_transpose_x_0"), val = bool(false)]; bool matmul_6_transpose_y_0 = const()[name = string("matmul_6_transpose_y_0"), val = bool(false)]; tensor matmul_6_cast_fp16 = matmul(transpose_x = matmul_6_transpose_x_0, transpose_y = matmul_6_transpose_y_0, x = reshape_18_cast_fp16, y = reshape_19_cast_fp16)[name = string("matmul_6_cast_fp16")]; tensor concat_125 = const()[name = string("concat_125"), val = tensor([1, 16, 64, 128])]; tensor reshape_20_cast_fp16 = reshape(shape = concat_125, x = matmul_6_cast_fp16)[name = string("reshape_20_cast_fp16")]; tensor var_5014_perm_0 = const()[name = string("op_5014_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_5033 = const()[name = string("op_5033"), val = tensor([1, 64, 2048])]; tensor var_5014_cast_fp16 = transpose(perm = var_5014_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_193")]; tensor attn_output_65_cast_fp16 = reshape(shape = var_5033, x = var_5014_cast_fp16)[name = string("attn_output_65_cast_fp16")]; tensor var_5038 = const()[name = string("op_5038"), val = tensor([0, 2, 1])]; string var_5054_pad_type_0 = const()[name = string("op_5054_pad_type_0"), val = string("valid")]; int32 var_5054_groups_0 = const()[name = string("op_5054_groups_0"), val = int32(1)]; tensor var_5054_strides_0 = const()[name = string("op_5054_strides_0"), val = tensor([1])]; tensor var_5054_pad_0 = const()[name = string("op_5054_pad_0"), val = tensor([0, 0])]; tensor var_5054_dilations_0 = const()[name = string("op_5054_dilations_0"), val = tensor([1])]; tensor squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1029848192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1032993984))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_5039_cast_fp16 = transpose(perm = var_5038, x = attn_output_65_cast_fp16)[name = string("transpose_192")]; tensor var_5054_cast_fp16 = conv(dilations = var_5054_dilations_0, groups = var_5054_groups_0, pad = var_5054_pad_0, pad_type = var_5054_pad_type_0, strides = var_5054_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_5039_cast_fp16)[name = string("op_5054_cast_fp16")]; tensor var_5058 = const()[name = string("op_5058"), val = tensor([0, 2, 1])]; tensor attn_output_69_cast_fp16 = transpose(perm = var_5058, x = var_5054_cast_fp16)[name = string("transpose_191")]; tensor hidden_states_41_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = attn_output_69_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; tensor mean_55_axes_0 = const()[name = string("mean_55_axes_0"), val = tensor([-1])]; bool mean_55_keep_dims_0 = const()[name = string("mean_55_keep_dims_0"), val = bool(true)]; tensor mean_55_cast_fp16 = reduce_mean(axes = mean_55_axes_0, keep_dims = mean_55_keep_dims_0, x = hidden_states_41_cast_fp16)[name = string("mean_55_cast_fp16")]; tensor input_119_cast_fp16 = sub(x = hidden_states_41_cast_fp16, y = mean_55_cast_fp16)[name = string("input_119_cast_fp16")]; tensor var_5077_axes_0 = const()[name = string("op_5077_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1033026816)))]; fp16 var_5065_to_fp16 = const()[name = string("op_5065_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5077_cast_fp16 = layer_norm(axes = var_5077_axes_0, epsilon = var_5065_to_fp16, gamma = model_model_layers_6_post_attention_layernorm_weight_to_fp16, x = input_119_cast_fp16)[name = string("op_5077_cast_fp16")]; tensor var_5091 = const()[name = string("op_5091"), val = tensor([0, 2, 1])]; tensor input_121_axes_0 = const()[name = string("input_121_axes_0"), val = tensor([2])]; tensor var_5092 = transpose(perm = var_5091, x = var_5077_cast_fp16)[name = string("transpose_190")]; tensor input_121 = expand_dims(axes = input_121_axes_0, x = var_5092)[name = string("input_121")]; string input_123_pad_type_0 = const()[name = string("input_123_pad_type_0"), val = string("valid")]; tensor input_123_strides_0 = const()[name = string("input_123_strides_0"), val = tensor([1, 1])]; tensor input_123_pad_0 = const()[name = string("input_123_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_123_dilations_0 = const()[name = string("input_123_dilations_0"), val = tensor([1, 1])]; int32 input_123_groups_0 = const()[name = string("input_123_groups_0"), val = int32(1)]; tensor input_123 = conv(dilations = input_123_dilations_0, groups = input_123_groups_0, pad = input_123_pad_0, pad_type = input_123_pad_type_0, strides = input_123_strides_0, weight = model_model_layers_6_mlp_gate_proj_weight_palettized, x = input_121)[name = string("input_123")]; string b_13_pad_type_0 = const()[name = string("b_13_pad_type_0"), val = string("valid")]; tensor b_13_strides_0 = const()[name = string("b_13_strides_0"), val = tensor([1, 1])]; tensor b_13_pad_0 = const()[name = string("b_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_13_dilations_0 = const()[name = string("b_13_dilations_0"), val = tensor([1, 1])]; int32 b_13_groups_0 = const()[name = string("b_13_groups_0"), val = int32(1)]; tensor b_13 = conv(dilations = b_13_dilations_0, groups = b_13_groups_0, pad = b_13_pad_0, pad_type = b_13_pad_type_0, strides = b_13_strides_0, weight = model_model_layers_6_mlp_up_proj_weight_palettized, x = input_121)[name = string("b_13")]; tensor c_13 = silu(x = input_123)[name = string("c_13")]; tensor input_125 = mul(x = c_13, y = b_13)[name = string("input_125")]; string e_13_pad_type_0 = const()[name = string("e_13_pad_type_0"), val = string("valid")]; tensor e_13_strides_0 = const()[name = string("e_13_strides_0"), val = tensor([1, 1])]; tensor e_13_pad_0 = const()[name = string("e_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_13_dilations_0 = const()[name = string("e_13_dilations_0"), val = tensor([1, 1])]; int32 e_13_groups_0 = const()[name = string("e_13_groups_0"), val = int32(1)]; tensor e_13 = conv(dilations = e_13_dilations_0, groups = e_13_groups_0, pad = e_13_pad_0, pad_type = e_13_pad_type_0, strides = e_13_strides_0, weight = model_model_layers_6_mlp_down_proj_weight_palettized, x = input_125)[name = string("e_13")]; tensor var_5114_axes_0 = const()[name = string("op_5114_axes_0"), val = tensor([2])]; tensor var_5114 = squeeze(axes = var_5114_axes_0, x = e_13)[name = string("op_5114")]; tensor var_5115 = const()[name = string("op_5115"), val = tensor([0, 2, 1])]; tensor var_5116 = transpose(perm = var_5115, x = var_5114)[name = string("transpose_189")]; tensor hidden_states_43_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = var_5116)[name = string("hidden_states_43_cast_fp16")]; tensor mean_57_axes_0 = const()[name = string("mean_57_axes_0"), val = tensor([-1])]; bool mean_57_keep_dims_0 = const()[name = string("mean_57_keep_dims_0"), val = bool(true)]; tensor mean_57_cast_fp16 = reduce_mean(axes = mean_57_axes_0, keep_dims = mean_57_keep_dims_0, x = hidden_states_43_cast_fp16)[name = string("mean_57_cast_fp16")]; tensor input_127_cast_fp16 = sub(x = hidden_states_43_cast_fp16, y = mean_57_cast_fp16)[name = string("input_127_cast_fp16")]; tensor var_5134_axes_0 = const()[name = string("op_5134_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1033030976)))]; fp16 var_5122_to_fp16 = const()[name = string("op_5122_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5134_cast_fp16 = layer_norm(axes = var_5134_axes_0, epsilon = var_5122_to_fp16, gamma = model_model_layers_7_input_layernorm_weight_to_fp16, x = input_127_cast_fp16)[name = string("op_5134_cast_fp16")]; tensor var_5146 = const()[name = string("op_5146"), val = tensor([0, 2, 1])]; tensor var_5149_axes_0 = const()[name = string("op_5149_axes_0"), val = tensor([2])]; tensor var_5147 = transpose(perm = var_5146, x = var_5134_cast_fp16)[name = string("transpose_188")]; tensor var_5149 = expand_dims(axes = var_5149_axes_0, x = var_5147)[name = string("op_5149")]; string query_states_57_pad_type_0 = const()[name = string("query_states_57_pad_type_0"), val = string("valid")]; tensor query_states_57_strides_0 = const()[name = string("query_states_57_strides_0"), val = tensor([1, 1])]; tensor query_states_57_pad_0 = const()[name = string("query_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_57_dilations_0 = const()[name = string("query_states_57_dilations_0"), val = tensor([1, 1])]; int32 query_states_57_groups_0 = const()[name = string("query_states_57_groups_0"), val = int32(1)]; tensor query_states_57 = conv(dilations = query_states_57_dilations_0, groups = query_states_57_groups_0, pad = query_states_57_pad_0, pad_type = query_states_57_pad_type_0, strides = query_states_57_strides_0, weight = model_model_layers_7_self_attn_q_proj_weight_palettized, x = var_5149)[name = string("query_states_57")]; string key_states_71_pad_type_0 = const()[name = string("key_states_71_pad_type_0"), val = string("valid")]; tensor key_states_71_strides_0 = const()[name = string("key_states_71_strides_0"), val = tensor([1, 1])]; tensor key_states_71_pad_0 = const()[name = string("key_states_71_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_71_dilations_0 = const()[name = string("key_states_71_dilations_0"), val = tensor([1, 1])]; int32 key_states_71_groups_0 = const()[name = string("key_states_71_groups_0"), val = int32(1)]; tensor key_states_71 = conv(dilations = key_states_71_dilations_0, groups = key_states_71_groups_0, pad = key_states_71_pad_0, pad_type = key_states_71_pad_type_0, strides = key_states_71_strides_0, weight = model_model_layers_7_self_attn_k_proj_weight_palettized, x = var_5149)[name = string("key_states_71")]; string value_states_57_pad_type_0 = const()[name = string("value_states_57_pad_type_0"), val = string("valid")]; tensor value_states_57_strides_0 = const()[name = string("value_states_57_strides_0"), val = tensor([1, 1])]; tensor value_states_57_pad_0 = const()[name = string("value_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_57_dilations_0 = const()[name = string("value_states_57_dilations_0"), val = tensor([1, 1])]; int32 value_states_57_groups_0 = const()[name = string("value_states_57_groups_0"), val = int32(1)]; tensor value_states_57 = conv(dilations = value_states_57_dilations_0, groups = value_states_57_groups_0, pad = value_states_57_pad_0, pad_type = value_states_57_pad_type_0, strides = value_states_57_strides_0, weight = model_model_layers_7_self_attn_v_proj_weight_palettized, x = var_5149)[name = string("value_states_57")]; tensor var_5191 = const()[name = string("op_5191"), val = tensor([1, 16, 128, 64])]; tensor var_5192 = reshape(shape = var_5191, x = query_states_57)[name = string("op_5192")]; tensor var_5197 = const()[name = string("op_5197"), val = tensor([0, 1, 3, 2])]; tensor var_5202 = const()[name = string("op_5202"), val = tensor([1, 8, 128, 64])]; tensor var_5203 = reshape(shape = var_5202, x = key_states_71)[name = string("op_5203")]; tensor var_5208 = const()[name = string("op_5208"), val = tensor([0, 1, 3, 2])]; tensor var_5213 = const()[name = string("op_5213"), val = tensor([1, 8, 128, 64])]; tensor var_5214 = reshape(shape = var_5213, x = value_states_57)[name = string("op_5214")]; tensor var_5219 = const()[name = string("op_5219"), val = tensor([0, 1, 3, 2])]; tensor mean_59_axes_0 = const()[name = string("mean_59_axes_0"), val = tensor([-1])]; bool mean_59_keep_dims_0 = const()[name = string("mean_59_keep_dims_0"), val = bool(true)]; tensor x_141 = transpose(perm = var_5197, x = var_5192)[name = string("transpose_187")]; tensor mean_59 = reduce_mean(axes = mean_59_axes_0, keep_dims = mean_59_keep_dims_0, x = x_141)[name = string("mean_59")]; tensor input_131 = sub(x = x_141, y = mean_59)[name = string("input_131")]; tensor var_5236_axes_0 = const()[name = string("op_5236_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_7_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1033035136)))]; fp16 var_5224_to_fp16 = const()[name = string("op_5224_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5236_cast_fp16 = layer_norm(axes = var_5236_axes_0, epsilon = var_5224_to_fp16, gamma = model_model_layers_7_self_attn_q_norm_weight_to_fp16, x = input_131)[name = string("op_5236_cast_fp16")]; tensor mean_61_axes_0 = const()[name = string("mean_61_axes_0"), val = tensor([-1])]; bool mean_61_keep_dims_0 = const()[name = string("mean_61_keep_dims_0"), val = bool(true)]; tensor x_143 = transpose(perm = var_5208, x = var_5203)[name = string("transpose_186")]; tensor mean_61 = reduce_mean(axes = mean_61_axes_0, keep_dims = mean_61_keep_dims_0, x = x_143)[name = string("mean_61")]; tensor input_133 = sub(x = x_143, y = mean_61)[name = string("input_133")]; tensor var_5254_axes_0 = const()[name = string("op_5254_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_7_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1033035456)))]; fp16 var_5242_to_fp16 = const()[name = string("op_5242_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5254_cast_fp16 = layer_norm(axes = var_5254_axes_0, epsilon = var_5242_to_fp16, gamma = model_model_layers_7_self_attn_k_norm_weight_to_fp16, x = input_133)[name = string("op_5254_cast_fp16")]; tensor var_5269 = mul(x = var_5236_cast_fp16, y = cos_5)[name = string("op_5269")]; tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_29 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = var_5236_cast_fp16)[name = string("x1_29")]; tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_29 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = var_5236_cast_fp16)[name = string("x2_29")]; fp16 const_161_promoted = const()[name = string("const_161_promoted"), val = fp16(-0x1p+0)]; tensor var_5290 = mul(x = x2_29, y = const_161_promoted)[name = string("op_5290")]; int32 var_5292 = const()[name = string("op_5292"), val = int32(-1)]; bool var_5293_interleave_0 = const()[name = string("op_5293_interleave_0"), val = bool(false)]; tensor var_5293 = concat(axis = var_5292, interleave = var_5293_interleave_0, values = (var_5290, x1_29))[name = string("op_5293")]; tensor var_5294 = mul(x = var_5293, y = sin_5)[name = string("op_5294")]; tensor query_states_59 = add(x = var_5269, y = var_5294)[name = string("query_states_59")]; tensor var_5297 = mul(x = var_5254_cast_fp16, y = cos_5)[name = string("op_5297")]; tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_31 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = var_5254_cast_fp16)[name = string("x1_31")]; tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_31 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = var_5254_cast_fp16)[name = string("x2_31")]; fp16 const_164_promoted = const()[name = string("const_164_promoted"), val = fp16(-0x1p+0)]; tensor var_5318 = mul(x = x2_31, y = const_164_promoted)[name = string("op_5318")]; int32 var_5320 = const()[name = string("op_5320"), val = int32(-1)]; bool var_5321_interleave_0 = const()[name = string("op_5321_interleave_0"), val = bool(false)]; tensor var_5321 = concat(axis = var_5320, interleave = var_5321_interleave_0, values = (var_5318, x1_31))[name = string("op_5321")]; tensor var_5322 = mul(x = var_5321, y = sin_5)[name = string("op_5322")]; tensor key_states_73 = add(x = var_5297, y = var_5322)[name = string("key_states_73")]; tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([7])]; tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; tensor expand_dims_88 = const()[name = string("expand_dims_88"), val = tensor([8])]; int32 concat_128_axis_0 = const()[name = string("concat_128_axis_0"), val = int32(0)]; bool concat_128_interleave_0 = const()[name = string("concat_128_interleave_0"), val = bool(false)]; tensor concat_128 = concat(axis = concat_128_axis_0, interleave = concat_128_interleave_0, values = (expand_dims_84, expand_dims_85, current_pos, expand_dims_87))[name = string("concat_128")]; tensor concat_129_values1_0 = const()[name = string("concat_129_values1_0"), val = tensor([0])]; tensor concat_129_values3_0 = const()[name = string("concat_129_values3_0"), val = tensor([0])]; int32 concat_129_axis_0 = const()[name = string("concat_129_axis_0"), val = int32(0)]; bool concat_129_interleave_0 = const()[name = string("concat_129_interleave_0"), val = bool(false)]; tensor concat_129 = concat(axis = concat_129_axis_0, interleave = concat_129_interleave_0, values = (expand_dims_88, concat_129_values1_0, var_1760, concat_129_values3_0))[name = string("concat_129")]; tensor model_model_kv_cache_0_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_128, begin_mask = model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0, end = concat_129, end_mask = model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_15_stride_0, update = key_states_73, x = coreml_update_state_69)[name = string("model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_70_write_state")]; tensor coreml_update_state_70 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_70")]; tensor expand_dims_90 = const()[name = string("expand_dims_90"), val = tensor([35])]; tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([0])]; tensor expand_dims_93 = const()[name = string("expand_dims_93"), val = tensor([0])]; tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([36])]; int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)]; bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)]; tensor concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (expand_dims_90, expand_dims_91, current_pos, expand_dims_93))[name = string("concat_132")]; tensor concat_133_values1_0 = const()[name = string("concat_133_values1_0"), val = tensor([0])]; tensor concat_133_values3_0 = const()[name = string("concat_133_values3_0"), val = tensor([0])]; int32 concat_133_axis_0 = const()[name = string("concat_133_axis_0"), val = int32(0)]; bool concat_133_interleave_0 = const()[name = string("concat_133_interleave_0"), val = bool(false)]; tensor concat_133 = concat(axis = concat_133_axis_0, interleave = concat_133_interleave_0, values = (expand_dims_94, concat_133_values1_0, var_1760, concat_133_values3_0))[name = string("concat_133")]; tensor model_model_kv_cache_0_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_59 = transpose(perm = var_5219, x = var_5214)[name = string("transpose_185")]; tensor model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_132, begin_mask = model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0, end = concat_133, end_mask = model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_16_stride_0, update = value_states_59, x = coreml_update_state_70)[name = string("model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_71_write_state")]; tensor coreml_update_state_71 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_71")]; tensor var_5393_begin_0 = const()[name = string("op_5393_begin_0"), val = tensor([7, 0, 0, 0])]; tensor var_5393_end_0 = const()[name = string("op_5393_end_0"), val = tensor([8, 8, 1024, 128])]; tensor var_5393_end_mask_0 = const()[name = string("op_5393_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5393_cast_fp16 = slice_by_index(begin = var_5393_begin_0, end = var_5393_end_0, end_mask = var_5393_end_mask_0, x = coreml_update_state_71)[name = string("op_5393_cast_fp16")]; tensor K_layer_cache_15_axes_0 = const()[name = string("K_layer_cache_15_axes_0"), val = tensor([0])]; tensor K_layer_cache_15_cast_fp16 = squeeze(axes = K_layer_cache_15_axes_0, x = var_5393_cast_fp16)[name = string("K_layer_cache_15_cast_fp16")]; tensor var_5400_begin_0 = const()[name = string("op_5400_begin_0"), val = tensor([35, 0, 0, 0])]; tensor var_5400_end_0 = const()[name = string("op_5400_end_0"), val = tensor([36, 8, 1024, 128])]; tensor var_5400_end_mask_0 = const()[name = string("op_5400_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5400_cast_fp16 = slice_by_index(begin = var_5400_begin_0, end = var_5400_end_0, end_mask = var_5400_end_mask_0, x = coreml_update_state_71)[name = string("op_5400_cast_fp16")]; tensor V_layer_cache_15_axes_0 = const()[name = string("V_layer_cache_15_axes_0"), val = tensor([0])]; tensor V_layer_cache_15_cast_fp16 = squeeze(axes = V_layer_cache_15_axes_0, x = var_5400_cast_fp16)[name = string("V_layer_cache_15_cast_fp16")]; tensor x_147_axes_0 = const()[name = string("x_147_axes_0"), val = tensor([1])]; tensor x_147_cast_fp16 = expand_dims(axes = x_147_axes_0, x = K_layer_cache_15_cast_fp16)[name = string("x_147_cast_fp16")]; tensor var_5429 = const()[name = string("op_5429"), val = tensor([1, 2, 1, 1])]; tensor x_149_cast_fp16 = tile(reps = var_5429, x = x_147_cast_fp16)[name = string("x_149_cast_fp16")]; tensor var_5441 = const()[name = string("op_5441"), val = tensor([1, -1, 1024, 128])]; tensor key_states_77_cast_fp16 = reshape(shape = var_5441, x = x_149_cast_fp16)[name = string("key_states_77_cast_fp16")]; tensor x_153_axes_0 = const()[name = string("x_153_axes_0"), val = tensor([1])]; tensor x_153_cast_fp16 = expand_dims(axes = x_153_axes_0, x = V_layer_cache_15_cast_fp16)[name = string("x_153_cast_fp16")]; tensor var_5449 = const()[name = string("op_5449"), val = tensor([1, 2, 1, 1])]; tensor x_155_cast_fp16 = tile(reps = var_5449, x = x_153_cast_fp16)[name = string("x_155_cast_fp16")]; bool var_5476_transpose_x_0 = const()[name = string("op_5476_transpose_x_0"), val = bool(false)]; bool var_5476_transpose_y_0 = const()[name = string("op_5476_transpose_y_0"), val = bool(true)]; tensor var_5476 = matmul(transpose_x = var_5476_transpose_x_0, transpose_y = var_5476_transpose_y_0, x = query_states_59, y = key_states_77_cast_fp16)[name = string("op_5476")]; fp16 var_5477_to_fp16 = const()[name = string("op_5477_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_29_cast_fp16 = mul(x = var_5476, y = var_5477_to_fp16)[name = string("attn_weights_29_cast_fp16")]; tensor attn_weights_31_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = causal_mask)[name = string("attn_weights_31_cast_fp16")]; int32 var_5512 = const()[name = string("op_5512"), val = int32(-1)]; tensor var_5514_cast_fp16 = softmax(axis = var_5512, x = attn_weights_31_cast_fp16)[name = string("op_5514_cast_fp16")]; tensor concat_138 = const()[name = string("concat_138"), val = tensor([16, 64, 1024])]; tensor reshape_21_cast_fp16 = reshape(shape = concat_138, x = var_5514_cast_fp16)[name = string("reshape_21_cast_fp16")]; tensor concat_139 = const()[name = string("concat_139"), val = tensor([16, 1024, 128])]; tensor reshape_22_cast_fp16 = reshape(shape = concat_139, x = x_155_cast_fp16)[name = string("reshape_22_cast_fp16")]; bool matmul_7_transpose_x_0 = const()[name = string("matmul_7_transpose_x_0"), val = bool(false)]; bool matmul_7_transpose_y_0 = const()[name = string("matmul_7_transpose_y_0"), val = bool(false)]; tensor matmul_7_cast_fp16 = matmul(transpose_x = matmul_7_transpose_x_0, transpose_y = matmul_7_transpose_y_0, x = reshape_21_cast_fp16, y = reshape_22_cast_fp16)[name = string("matmul_7_cast_fp16")]; tensor concat_143 = const()[name = string("concat_143"), val = tensor([1, 16, 64, 128])]; tensor reshape_23_cast_fp16 = reshape(shape = concat_143, x = matmul_7_cast_fp16)[name = string("reshape_23_cast_fp16")]; tensor var_5526_perm_0 = const()[name = string("op_5526_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_5545 = const()[name = string("op_5545"), val = tensor([1, 64, 2048])]; tensor var_5526_cast_fp16 = transpose(perm = var_5526_perm_0, x = reshape_23_cast_fp16)[name = string("transpose_184")]; tensor attn_output_75_cast_fp16 = reshape(shape = var_5545, x = var_5526_cast_fp16)[name = string("attn_output_75_cast_fp16")]; tensor var_5550 = const()[name = string("op_5550"), val = tensor([0, 2, 1])]; string var_5566_pad_type_0 = const()[name = string("op_5566_pad_type_0"), val = string("valid")]; int32 var_5566_groups_0 = const()[name = string("op_5566_groups_0"), val = int32(1)]; tensor var_5566_strides_0 = const()[name = string("op_5566_strides_0"), val = tensor([1])]; tensor var_5566_pad_0 = const()[name = string("op_5566_pad_0"), val = tensor([0, 0])]; tensor var_5566_dilations_0 = const()[name = string("op_5566_dilations_0"), val = tensor([1])]; tensor squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1033035776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1036181568))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_5551_cast_fp16 = transpose(perm = var_5550, x = attn_output_75_cast_fp16)[name = string("transpose_183")]; tensor var_5566_cast_fp16 = conv(dilations = var_5566_dilations_0, groups = var_5566_groups_0, pad = var_5566_pad_0, pad_type = var_5566_pad_type_0, strides = var_5566_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_5551_cast_fp16)[name = string("op_5566_cast_fp16")]; tensor var_5570 = const()[name = string("op_5570"), val = tensor([0, 2, 1])]; tensor attn_output_79_cast_fp16 = transpose(perm = var_5570, x = var_5566_cast_fp16)[name = string("transpose_182")]; tensor hidden_states_47_cast_fp16 = add(x = hidden_states_43_cast_fp16, y = attn_output_79_cast_fp16)[name = string("hidden_states_47_cast_fp16")]; tensor mean_63_axes_0 = const()[name = string("mean_63_axes_0"), val = tensor([-1])]; bool mean_63_keep_dims_0 = const()[name = string("mean_63_keep_dims_0"), val = bool(true)]; tensor mean_63_cast_fp16 = reduce_mean(axes = mean_63_axes_0, keep_dims = mean_63_keep_dims_0, x = hidden_states_47_cast_fp16)[name = string("mean_63_cast_fp16")]; tensor input_137_cast_fp16 = sub(x = hidden_states_47_cast_fp16, y = mean_63_cast_fp16)[name = string("input_137_cast_fp16")]; tensor var_5589_axes_0 = const()[name = string("op_5589_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1036214400)))]; fp16 var_5577_to_fp16 = const()[name = string("op_5577_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5589_cast_fp16 = layer_norm(axes = var_5589_axes_0, epsilon = var_5577_to_fp16, gamma = model_model_layers_7_post_attention_layernorm_weight_to_fp16, x = input_137_cast_fp16)[name = string("op_5589_cast_fp16")]; tensor var_5603 = const()[name = string("op_5603"), val = tensor([0, 2, 1])]; tensor input_139_axes_0 = const()[name = string("input_139_axes_0"), val = tensor([2])]; tensor var_5604 = transpose(perm = var_5603, x = var_5589_cast_fp16)[name = string("transpose_181")]; tensor input_139 = expand_dims(axes = input_139_axes_0, x = var_5604)[name = string("input_139")]; string input_141_pad_type_0 = const()[name = string("input_141_pad_type_0"), val = string("valid")]; tensor input_141_strides_0 = const()[name = string("input_141_strides_0"), val = tensor([1, 1])]; tensor input_141_pad_0 = const()[name = string("input_141_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_141_dilations_0 = const()[name = string("input_141_dilations_0"), val = tensor([1, 1])]; int32 input_141_groups_0 = const()[name = string("input_141_groups_0"), val = int32(1)]; tensor input_141 = conv(dilations = input_141_dilations_0, groups = input_141_groups_0, pad = input_141_pad_0, pad_type = input_141_pad_type_0, strides = input_141_strides_0, weight = model_model_layers_7_mlp_gate_proj_weight_palettized, x = input_139)[name = string("input_141")]; string b_15_pad_type_0 = const()[name = string("b_15_pad_type_0"), val = string("valid")]; tensor b_15_strides_0 = const()[name = string("b_15_strides_0"), val = tensor([1, 1])]; tensor b_15_pad_0 = const()[name = string("b_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_15_dilations_0 = const()[name = string("b_15_dilations_0"), val = tensor([1, 1])]; int32 b_15_groups_0 = const()[name = string("b_15_groups_0"), val = int32(1)]; tensor b_15 = conv(dilations = b_15_dilations_0, groups = b_15_groups_0, pad = b_15_pad_0, pad_type = b_15_pad_type_0, strides = b_15_strides_0, weight = model_model_layers_7_mlp_up_proj_weight_palettized, x = input_139)[name = string("b_15")]; tensor c_15 = silu(x = input_141)[name = string("c_15")]; tensor input_143 = mul(x = c_15, y = b_15)[name = string("input_143")]; string e_15_pad_type_0 = const()[name = string("e_15_pad_type_0"), val = string("valid")]; tensor e_15_strides_0 = const()[name = string("e_15_strides_0"), val = tensor([1, 1])]; tensor e_15_pad_0 = const()[name = string("e_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_15_dilations_0 = const()[name = string("e_15_dilations_0"), val = tensor([1, 1])]; int32 e_15_groups_0 = const()[name = string("e_15_groups_0"), val = int32(1)]; tensor e_15 = conv(dilations = e_15_dilations_0, groups = e_15_groups_0, pad = e_15_pad_0, pad_type = e_15_pad_type_0, strides = e_15_strides_0, weight = model_model_layers_7_mlp_down_proj_weight_palettized, x = input_143)[name = string("e_15")]; tensor var_5626_axes_0 = const()[name = string("op_5626_axes_0"), val = tensor([2])]; tensor var_5626 = squeeze(axes = var_5626_axes_0, x = e_15)[name = string("op_5626")]; tensor var_5627 = const()[name = string("op_5627"), val = tensor([0, 2, 1])]; tensor var_5628 = transpose(perm = var_5627, x = var_5626)[name = string("transpose_180")]; tensor hidden_states_49_cast_fp16 = add(x = hidden_states_47_cast_fp16, y = var_5628)[name = string("hidden_states_49_cast_fp16")]; tensor mean_65_axes_0 = const()[name = string("mean_65_axes_0"), val = tensor([-1])]; bool mean_65_keep_dims_0 = const()[name = string("mean_65_keep_dims_0"), val = bool(true)]; tensor mean_65_cast_fp16 = reduce_mean(axes = mean_65_axes_0, keep_dims = mean_65_keep_dims_0, x = hidden_states_49_cast_fp16)[name = string("mean_65_cast_fp16")]; tensor input_145_cast_fp16 = sub(x = hidden_states_49_cast_fp16, y = mean_65_cast_fp16)[name = string("input_145_cast_fp16")]; tensor var_5646_axes_0 = const()[name = string("op_5646_axes_0"), val = tensor([-1])]; tensor model_model_layers_8_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1036218560)))]; fp16 var_5634_to_fp16 = const()[name = string("op_5634_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5646_cast_fp16 = layer_norm(axes = var_5646_axes_0, epsilon = var_5634_to_fp16, gamma = model_model_layers_8_input_layernorm_weight_to_fp16, x = input_145_cast_fp16)[name = string("op_5646_cast_fp16")]; tensor var_5658 = const()[name = string("op_5658"), val = tensor([0, 2, 1])]; tensor var_5661_axes_0 = const()[name = string("op_5661_axes_0"), val = tensor([2])]; tensor var_5659 = transpose(perm = var_5658, x = var_5646_cast_fp16)[name = string("transpose_179")]; tensor var_5661 = expand_dims(axes = var_5661_axes_0, x = var_5659)[name = string("op_5661")]; string query_states_65_pad_type_0 = const()[name = string("query_states_65_pad_type_0"), val = string("valid")]; tensor query_states_65_strides_0 = const()[name = string("query_states_65_strides_0"), val = tensor([1, 1])]; tensor query_states_65_pad_0 = const()[name = string("query_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_65_dilations_0 = const()[name = string("query_states_65_dilations_0"), val = tensor([1, 1])]; int32 query_states_65_groups_0 = const()[name = string("query_states_65_groups_0"), val = int32(1)]; tensor query_states_65 = conv(dilations = query_states_65_dilations_0, groups = query_states_65_groups_0, pad = query_states_65_pad_0, pad_type = query_states_65_pad_type_0, strides = query_states_65_strides_0, weight = model_model_layers_8_self_attn_q_proj_weight_palettized, x = var_5661)[name = string("query_states_65")]; string key_states_81_pad_type_0 = const()[name = string("key_states_81_pad_type_0"), val = string("valid")]; tensor key_states_81_strides_0 = const()[name = string("key_states_81_strides_0"), val = tensor([1, 1])]; tensor key_states_81_pad_0 = const()[name = string("key_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_81_dilations_0 = const()[name = string("key_states_81_dilations_0"), val = tensor([1, 1])]; int32 key_states_81_groups_0 = const()[name = string("key_states_81_groups_0"), val = int32(1)]; tensor key_states_81 = conv(dilations = key_states_81_dilations_0, groups = key_states_81_groups_0, pad = key_states_81_pad_0, pad_type = key_states_81_pad_type_0, strides = key_states_81_strides_0, weight = model_model_layers_8_self_attn_k_proj_weight_palettized, x = var_5661)[name = string("key_states_81")]; string value_states_65_pad_type_0 = const()[name = string("value_states_65_pad_type_0"), val = string("valid")]; tensor value_states_65_strides_0 = const()[name = string("value_states_65_strides_0"), val = tensor([1, 1])]; tensor value_states_65_pad_0 = const()[name = string("value_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_65_dilations_0 = const()[name = string("value_states_65_dilations_0"), val = tensor([1, 1])]; int32 value_states_65_groups_0 = const()[name = string("value_states_65_groups_0"), val = int32(1)]; tensor value_states_65 = conv(dilations = value_states_65_dilations_0, groups = value_states_65_groups_0, pad = value_states_65_pad_0, pad_type = value_states_65_pad_type_0, strides = value_states_65_strides_0, weight = model_model_layers_8_self_attn_v_proj_weight_palettized, x = var_5661)[name = string("value_states_65")]; tensor var_5703 = const()[name = string("op_5703"), val = tensor([1, 16, 128, 64])]; tensor var_5704 = reshape(shape = var_5703, x = query_states_65)[name = string("op_5704")]; tensor var_5709 = const()[name = string("op_5709"), val = tensor([0, 1, 3, 2])]; tensor var_5714 = const()[name = string("op_5714"), val = tensor([1, 8, 128, 64])]; tensor var_5715 = reshape(shape = var_5714, x = key_states_81)[name = string("op_5715")]; tensor var_5720 = const()[name = string("op_5720"), val = tensor([0, 1, 3, 2])]; tensor var_5725 = const()[name = string("op_5725"), val = tensor([1, 8, 128, 64])]; tensor var_5726 = reshape(shape = var_5725, x = value_states_65)[name = string("op_5726")]; tensor var_5731 = const()[name = string("op_5731"), val = tensor([0, 1, 3, 2])]; tensor mean_67_axes_0 = const()[name = string("mean_67_axes_0"), val = tensor([-1])]; bool mean_67_keep_dims_0 = const()[name = string("mean_67_keep_dims_0"), val = bool(true)]; tensor x_161 = transpose(perm = var_5709, x = var_5704)[name = string("transpose_178")]; tensor mean_67 = reduce_mean(axes = mean_67_axes_0, keep_dims = mean_67_keep_dims_0, x = x_161)[name = string("mean_67")]; tensor input_149 = sub(x = x_161, y = mean_67)[name = string("input_149")]; tensor var_5748_axes_0 = const()[name = string("op_5748_axes_0"), val = tensor([-1])]; tensor model_model_layers_8_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_8_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1036222720)))]; fp16 var_5736_to_fp16 = const()[name = string("op_5736_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5748_cast_fp16 = layer_norm(axes = var_5748_axes_0, epsilon = var_5736_to_fp16, gamma = model_model_layers_8_self_attn_q_norm_weight_to_fp16, x = input_149)[name = string("op_5748_cast_fp16")]; tensor mean_69_axes_0 = const()[name = string("mean_69_axes_0"), val = tensor([-1])]; bool mean_69_keep_dims_0 = const()[name = string("mean_69_keep_dims_0"), val = bool(true)]; tensor x_163 = transpose(perm = var_5720, x = var_5715)[name = string("transpose_177")]; tensor mean_69 = reduce_mean(axes = mean_69_axes_0, keep_dims = mean_69_keep_dims_0, x = x_163)[name = string("mean_69")]; tensor input_151 = sub(x = x_163, y = mean_69)[name = string("input_151")]; tensor var_5766_axes_0 = const()[name = string("op_5766_axes_0"), val = tensor([-1])]; tensor model_model_layers_8_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_8_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1036223040)))]; fp16 var_5754_to_fp16 = const()[name = string("op_5754_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5766_cast_fp16 = layer_norm(axes = var_5766_axes_0, epsilon = var_5754_to_fp16, gamma = model_model_layers_8_self_attn_k_norm_weight_to_fp16, x = input_151)[name = string("op_5766_cast_fp16")]; tensor var_5781 = mul(x = var_5748_cast_fp16, y = cos_5)[name = string("op_5781")]; tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_33 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = var_5748_cast_fp16)[name = string("x1_33")]; tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_33 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = var_5748_cast_fp16)[name = string("x2_33")]; fp16 const_183_promoted = const()[name = string("const_183_promoted"), val = fp16(-0x1p+0)]; tensor var_5802 = mul(x = x2_33, y = const_183_promoted)[name = string("op_5802")]; int32 var_5804 = const()[name = string("op_5804"), val = int32(-1)]; bool var_5805_interleave_0 = const()[name = string("op_5805_interleave_0"), val = bool(false)]; tensor var_5805 = concat(axis = var_5804, interleave = var_5805_interleave_0, values = (var_5802, x1_33))[name = string("op_5805")]; tensor var_5806 = mul(x = var_5805, y = sin_5)[name = string("op_5806")]; tensor query_states_67 = add(x = var_5781, y = var_5806)[name = string("query_states_67")]; tensor var_5809 = mul(x = var_5766_cast_fp16, y = cos_5)[name = string("op_5809")]; tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_35 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = var_5766_cast_fp16)[name = string("x1_35")]; tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_35 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = var_5766_cast_fp16)[name = string("x2_35")]; fp16 const_186_promoted = const()[name = string("const_186_promoted"), val = fp16(-0x1p+0)]; tensor var_5830 = mul(x = x2_35, y = const_186_promoted)[name = string("op_5830")]; int32 var_5832 = const()[name = string("op_5832"), val = int32(-1)]; bool var_5833_interleave_0 = const()[name = string("op_5833_interleave_0"), val = bool(false)]; tensor var_5833 = concat(axis = var_5832, interleave = var_5833_interleave_0, values = (var_5830, x1_35))[name = string("op_5833")]; tensor var_5834 = mul(x = var_5833, y = sin_5)[name = string("op_5834")]; tensor key_states_83 = add(x = var_5809, y = var_5834)[name = string("key_states_83")]; tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([8])]; tensor expand_dims_97 = const()[name = string("expand_dims_97"), val = tensor([0])]; tensor expand_dims_99 = const()[name = string("expand_dims_99"), val = tensor([0])]; tensor expand_dims_100 = const()[name = string("expand_dims_100"), val = tensor([9])]; int32 concat_146_axis_0 = const()[name = string("concat_146_axis_0"), val = int32(0)]; bool concat_146_interleave_0 = const()[name = string("concat_146_interleave_0"), val = bool(false)]; tensor concat_146 = concat(axis = concat_146_axis_0, interleave = concat_146_interleave_0, values = (expand_dims_96, expand_dims_97, current_pos, expand_dims_99))[name = string("concat_146")]; tensor concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = tensor([0])]; tensor concat_147_values3_0 = const()[name = string("concat_147_values3_0"), val = tensor([0])]; int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (expand_dims_100, concat_147_values1_0, var_1760, concat_147_values3_0))[name = string("concat_147")]; tensor model_model_kv_cache_0_internal_tensor_assign_17_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_146, begin_mask = model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0, end = concat_147, end_mask = model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_17_stride_0, update = key_states_83, x = coreml_update_state_71)[name = string("model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_72_write_state")]; tensor coreml_update_state_72 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_72")]; tensor expand_dims_102 = const()[name = string("expand_dims_102"), val = tensor([36])]; tensor expand_dims_103 = const()[name = string("expand_dims_103"), val = tensor([0])]; tensor expand_dims_105 = const()[name = string("expand_dims_105"), val = tensor([0])]; tensor expand_dims_106 = const()[name = string("expand_dims_106"), val = tensor([37])]; int32 concat_150_axis_0 = const()[name = string("concat_150_axis_0"), val = int32(0)]; bool concat_150_interleave_0 = const()[name = string("concat_150_interleave_0"), val = bool(false)]; tensor concat_150 = concat(axis = concat_150_axis_0, interleave = concat_150_interleave_0, values = (expand_dims_102, expand_dims_103, current_pos, expand_dims_105))[name = string("concat_150")]; tensor concat_151_values1_0 = const()[name = string("concat_151_values1_0"), val = tensor([0])]; tensor concat_151_values3_0 = const()[name = string("concat_151_values3_0"), val = tensor([0])]; int32 concat_151_axis_0 = const()[name = string("concat_151_axis_0"), val = int32(0)]; bool concat_151_interleave_0 = const()[name = string("concat_151_interleave_0"), val = bool(false)]; tensor concat_151 = concat(axis = concat_151_axis_0, interleave = concat_151_interleave_0, values = (expand_dims_106, concat_151_values1_0, var_1760, concat_151_values3_0))[name = string("concat_151")]; tensor model_model_kv_cache_0_internal_tensor_assign_18_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_67 = transpose(perm = var_5731, x = var_5726)[name = string("transpose_176")]; tensor model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_150, begin_mask = model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0, end = concat_151, end_mask = model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_18_stride_0, update = value_states_67, x = coreml_update_state_72)[name = string("model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_73_write_state")]; tensor coreml_update_state_73 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_73")]; tensor var_5905_begin_0 = const()[name = string("op_5905_begin_0"), val = tensor([8, 0, 0, 0])]; tensor var_5905_end_0 = const()[name = string("op_5905_end_0"), val = tensor([9, 8, 1024, 128])]; tensor var_5905_end_mask_0 = const()[name = string("op_5905_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5905_cast_fp16 = slice_by_index(begin = var_5905_begin_0, end = var_5905_end_0, end_mask = var_5905_end_mask_0, x = coreml_update_state_73)[name = string("op_5905_cast_fp16")]; tensor K_layer_cache_17_axes_0 = const()[name = string("K_layer_cache_17_axes_0"), val = tensor([0])]; tensor K_layer_cache_17_cast_fp16 = squeeze(axes = K_layer_cache_17_axes_0, x = var_5905_cast_fp16)[name = string("K_layer_cache_17_cast_fp16")]; tensor var_5912_begin_0 = const()[name = string("op_5912_begin_0"), val = tensor([36, 0, 0, 0])]; tensor var_5912_end_0 = const()[name = string("op_5912_end_0"), val = tensor([37, 8, 1024, 128])]; tensor var_5912_end_mask_0 = const()[name = string("op_5912_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5912_cast_fp16 = slice_by_index(begin = var_5912_begin_0, end = var_5912_end_0, end_mask = var_5912_end_mask_0, x = coreml_update_state_73)[name = string("op_5912_cast_fp16")]; tensor V_layer_cache_17_axes_0 = const()[name = string("V_layer_cache_17_axes_0"), val = tensor([0])]; tensor V_layer_cache_17_cast_fp16 = squeeze(axes = V_layer_cache_17_axes_0, x = var_5912_cast_fp16)[name = string("V_layer_cache_17_cast_fp16")]; tensor x_167_axes_0 = const()[name = string("x_167_axes_0"), val = tensor([1])]; tensor x_167_cast_fp16 = expand_dims(axes = x_167_axes_0, x = K_layer_cache_17_cast_fp16)[name = string("x_167_cast_fp16")]; tensor var_5941 = const()[name = string("op_5941"), val = tensor([1, 2, 1, 1])]; tensor x_169_cast_fp16 = tile(reps = var_5941, x = x_167_cast_fp16)[name = string("x_169_cast_fp16")]; tensor var_5953 = const()[name = string("op_5953"), val = tensor([1, -1, 1024, 128])]; tensor key_states_87_cast_fp16 = reshape(shape = var_5953, x = x_169_cast_fp16)[name = string("key_states_87_cast_fp16")]; tensor x_173_axes_0 = const()[name = string("x_173_axes_0"), val = tensor([1])]; tensor x_173_cast_fp16 = expand_dims(axes = x_173_axes_0, x = V_layer_cache_17_cast_fp16)[name = string("x_173_cast_fp16")]; tensor var_5961 = const()[name = string("op_5961"), val = tensor([1, 2, 1, 1])]; tensor x_175_cast_fp16 = tile(reps = var_5961, x = x_173_cast_fp16)[name = string("x_175_cast_fp16")]; bool var_5988_transpose_x_0 = const()[name = string("op_5988_transpose_x_0"), val = bool(false)]; bool var_5988_transpose_y_0 = const()[name = string("op_5988_transpose_y_0"), val = bool(true)]; tensor var_5988 = matmul(transpose_x = var_5988_transpose_x_0, transpose_y = var_5988_transpose_y_0, x = query_states_67, y = key_states_87_cast_fp16)[name = string("op_5988")]; fp16 var_5989_to_fp16 = const()[name = string("op_5989_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_33_cast_fp16 = mul(x = var_5988, y = var_5989_to_fp16)[name = string("attn_weights_33_cast_fp16")]; tensor attn_weights_35_cast_fp16 = add(x = attn_weights_33_cast_fp16, y = causal_mask)[name = string("attn_weights_35_cast_fp16")]; int32 var_6024 = const()[name = string("op_6024"), val = int32(-1)]; tensor var_6026_cast_fp16 = softmax(axis = var_6024, x = attn_weights_35_cast_fp16)[name = string("op_6026_cast_fp16")]; tensor concat_156 = const()[name = string("concat_156"), val = tensor([16, 64, 1024])]; tensor reshape_24_cast_fp16 = reshape(shape = concat_156, x = var_6026_cast_fp16)[name = string("reshape_24_cast_fp16")]; tensor concat_157 = const()[name = string("concat_157"), val = tensor([16, 1024, 128])]; tensor reshape_25_cast_fp16 = reshape(shape = concat_157, x = x_175_cast_fp16)[name = string("reshape_25_cast_fp16")]; bool matmul_8_transpose_x_0 = const()[name = string("matmul_8_transpose_x_0"), val = bool(false)]; bool matmul_8_transpose_y_0 = const()[name = string("matmul_8_transpose_y_0"), val = bool(false)]; tensor matmul_8_cast_fp16 = matmul(transpose_x = matmul_8_transpose_x_0, transpose_y = matmul_8_transpose_y_0, x = reshape_24_cast_fp16, y = reshape_25_cast_fp16)[name = string("matmul_8_cast_fp16")]; tensor concat_161 = const()[name = string("concat_161"), val = tensor([1, 16, 64, 128])]; tensor reshape_26_cast_fp16 = reshape(shape = concat_161, x = matmul_8_cast_fp16)[name = string("reshape_26_cast_fp16")]; tensor var_6038_perm_0 = const()[name = string("op_6038_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_6057 = const()[name = string("op_6057"), val = tensor([1, 64, 2048])]; tensor var_6038_cast_fp16 = transpose(perm = var_6038_perm_0, x = reshape_26_cast_fp16)[name = string("transpose_175")]; tensor attn_output_85_cast_fp16 = reshape(shape = var_6057, x = var_6038_cast_fp16)[name = string("attn_output_85_cast_fp16")]; tensor var_6062 = const()[name = string("op_6062"), val = tensor([0, 2, 1])]; string var_6078_pad_type_0 = const()[name = string("op_6078_pad_type_0"), val = string("valid")]; int32 var_6078_groups_0 = const()[name = string("op_6078_groups_0"), val = int32(1)]; tensor var_6078_strides_0 = const()[name = string("op_6078_strides_0"), val = tensor([1])]; tensor var_6078_pad_0 = const()[name = string("op_6078_pad_0"), val = tensor([0, 0])]; tensor var_6078_dilations_0 = const()[name = string("op_6078_dilations_0"), val = tensor([1])]; tensor squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1036223360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1039369152))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_6063_cast_fp16 = transpose(perm = var_6062, x = attn_output_85_cast_fp16)[name = string("transpose_174")]; tensor var_6078_cast_fp16 = conv(dilations = var_6078_dilations_0, groups = var_6078_groups_0, pad = var_6078_pad_0, pad_type = var_6078_pad_type_0, strides = var_6078_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_6063_cast_fp16)[name = string("op_6078_cast_fp16")]; tensor var_6082 = const()[name = string("op_6082"), val = tensor([0, 2, 1])]; tensor attn_output_89_cast_fp16 = transpose(perm = var_6082, x = var_6078_cast_fp16)[name = string("transpose_173")]; tensor hidden_states_53_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = attn_output_89_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; tensor mean_71_axes_0 = const()[name = string("mean_71_axes_0"), val = tensor([-1])]; bool mean_71_keep_dims_0 = const()[name = string("mean_71_keep_dims_0"), val = bool(true)]; tensor mean_71_cast_fp16 = reduce_mean(axes = mean_71_axes_0, keep_dims = mean_71_keep_dims_0, x = hidden_states_53_cast_fp16)[name = string("mean_71_cast_fp16")]; tensor input_155_cast_fp16 = sub(x = hidden_states_53_cast_fp16, y = mean_71_cast_fp16)[name = string("input_155_cast_fp16")]; tensor var_6101_axes_0 = const()[name = string("op_6101_axes_0"), val = tensor([-1])]; tensor model_model_layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1039401984)))]; fp16 var_6089_to_fp16 = const()[name = string("op_6089_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6101_cast_fp16 = layer_norm(axes = var_6101_axes_0, epsilon = var_6089_to_fp16, gamma = model_model_layers_8_post_attention_layernorm_weight_to_fp16, x = input_155_cast_fp16)[name = string("op_6101_cast_fp16")]; tensor var_6115 = const()[name = string("op_6115"), val = tensor([0, 2, 1])]; tensor input_157_axes_0 = const()[name = string("input_157_axes_0"), val = tensor([2])]; tensor var_6116 = transpose(perm = var_6115, x = var_6101_cast_fp16)[name = string("transpose_172")]; tensor input_157 = expand_dims(axes = input_157_axes_0, x = var_6116)[name = string("input_157")]; string input_159_pad_type_0 = const()[name = string("input_159_pad_type_0"), val = string("valid")]; tensor input_159_strides_0 = const()[name = string("input_159_strides_0"), val = tensor([1, 1])]; tensor input_159_pad_0 = const()[name = string("input_159_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_159_dilations_0 = const()[name = string("input_159_dilations_0"), val = tensor([1, 1])]; int32 input_159_groups_0 = const()[name = string("input_159_groups_0"), val = int32(1)]; tensor input_159 = conv(dilations = input_159_dilations_0, groups = input_159_groups_0, pad = input_159_pad_0, pad_type = input_159_pad_type_0, strides = input_159_strides_0, weight = model_model_layers_8_mlp_gate_proj_weight_palettized, x = input_157)[name = string("input_159")]; string b_17_pad_type_0 = const()[name = string("b_17_pad_type_0"), val = string("valid")]; tensor b_17_strides_0 = const()[name = string("b_17_strides_0"), val = tensor([1, 1])]; tensor b_17_pad_0 = const()[name = string("b_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_17_dilations_0 = const()[name = string("b_17_dilations_0"), val = tensor([1, 1])]; int32 b_17_groups_0 = const()[name = string("b_17_groups_0"), val = int32(1)]; tensor b_17 = conv(dilations = b_17_dilations_0, groups = b_17_groups_0, pad = b_17_pad_0, pad_type = b_17_pad_type_0, strides = b_17_strides_0, weight = model_model_layers_8_mlp_up_proj_weight_palettized, x = input_157)[name = string("b_17")]; tensor c_17 = silu(x = input_159)[name = string("c_17")]; tensor input_161 = mul(x = c_17, y = b_17)[name = string("input_161")]; string e_17_pad_type_0 = const()[name = string("e_17_pad_type_0"), val = string("valid")]; tensor e_17_strides_0 = const()[name = string("e_17_strides_0"), val = tensor([1, 1])]; tensor e_17_pad_0 = const()[name = string("e_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_17_dilations_0 = const()[name = string("e_17_dilations_0"), val = tensor([1, 1])]; int32 e_17_groups_0 = const()[name = string("e_17_groups_0"), val = int32(1)]; tensor e_17 = conv(dilations = e_17_dilations_0, groups = e_17_groups_0, pad = e_17_pad_0, pad_type = e_17_pad_type_0, strides = e_17_strides_0, weight = model_model_layers_8_mlp_down_proj_weight_palettized, x = input_161)[name = string("e_17")]; tensor var_6138_axes_0 = const()[name = string("op_6138_axes_0"), val = tensor([2])]; tensor var_6138 = squeeze(axes = var_6138_axes_0, x = e_17)[name = string("op_6138")]; tensor var_6139 = const()[name = string("op_6139"), val = tensor([0, 2, 1])]; tensor var_6140 = transpose(perm = var_6139, x = var_6138)[name = string("transpose_171")]; tensor hidden_states_55_cast_fp16 = add(x = hidden_states_53_cast_fp16, y = var_6140)[name = string("hidden_states_55_cast_fp16")]; tensor mean_73_axes_0 = const()[name = string("mean_73_axes_0"), val = tensor([-1])]; bool mean_73_keep_dims_0 = const()[name = string("mean_73_keep_dims_0"), val = bool(true)]; tensor mean_73_cast_fp16 = reduce_mean(axes = mean_73_axes_0, keep_dims = mean_73_keep_dims_0, x = hidden_states_55_cast_fp16)[name = string("mean_73_cast_fp16")]; tensor input_163_cast_fp16 = sub(x = hidden_states_55_cast_fp16, y = mean_73_cast_fp16)[name = string("input_163_cast_fp16")]; tensor var_6158_axes_0 = const()[name = string("op_6158_axes_0"), val = tensor([-1])]; tensor model_model_layers_9_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1039406144)))]; fp16 var_6146_to_fp16 = const()[name = string("op_6146_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6158_cast_fp16 = layer_norm(axes = var_6158_axes_0, epsilon = var_6146_to_fp16, gamma = model_model_layers_9_input_layernorm_weight_to_fp16, x = input_163_cast_fp16)[name = string("op_6158_cast_fp16")]; tensor var_6170 = const()[name = string("op_6170"), val = tensor([0, 2, 1])]; tensor var_6173_axes_0 = const()[name = string("op_6173_axes_0"), val = tensor([2])]; tensor var_6171 = transpose(perm = var_6170, x = var_6158_cast_fp16)[name = string("transpose_170")]; tensor var_6173 = expand_dims(axes = var_6173_axes_0, x = var_6171)[name = string("op_6173")]; string query_states_73_pad_type_0 = const()[name = string("query_states_73_pad_type_0"), val = string("valid")]; tensor query_states_73_strides_0 = const()[name = string("query_states_73_strides_0"), val = tensor([1, 1])]; tensor query_states_73_pad_0 = const()[name = string("query_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_73_dilations_0 = const()[name = string("query_states_73_dilations_0"), val = tensor([1, 1])]; int32 query_states_73_groups_0 = const()[name = string("query_states_73_groups_0"), val = int32(1)]; tensor query_states_73 = conv(dilations = query_states_73_dilations_0, groups = query_states_73_groups_0, pad = query_states_73_pad_0, pad_type = query_states_73_pad_type_0, strides = query_states_73_strides_0, weight = model_model_layers_9_self_attn_q_proj_weight_palettized, x = var_6173)[name = string("query_states_73")]; string key_states_91_pad_type_0 = const()[name = string("key_states_91_pad_type_0"), val = string("valid")]; tensor key_states_91_strides_0 = const()[name = string("key_states_91_strides_0"), val = tensor([1, 1])]; tensor key_states_91_pad_0 = const()[name = string("key_states_91_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_91_dilations_0 = const()[name = string("key_states_91_dilations_0"), val = tensor([1, 1])]; int32 key_states_91_groups_0 = const()[name = string("key_states_91_groups_0"), val = int32(1)]; tensor key_states_91 = conv(dilations = key_states_91_dilations_0, groups = key_states_91_groups_0, pad = key_states_91_pad_0, pad_type = key_states_91_pad_type_0, strides = key_states_91_strides_0, weight = model_model_layers_9_self_attn_k_proj_weight_palettized, x = var_6173)[name = string("key_states_91")]; string value_states_73_pad_type_0 = const()[name = string("value_states_73_pad_type_0"), val = string("valid")]; tensor value_states_73_strides_0 = const()[name = string("value_states_73_strides_0"), val = tensor([1, 1])]; tensor value_states_73_pad_0 = const()[name = string("value_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_73_dilations_0 = const()[name = string("value_states_73_dilations_0"), val = tensor([1, 1])]; int32 value_states_73_groups_0 = const()[name = string("value_states_73_groups_0"), val = int32(1)]; tensor value_states_73 = conv(dilations = value_states_73_dilations_0, groups = value_states_73_groups_0, pad = value_states_73_pad_0, pad_type = value_states_73_pad_type_0, strides = value_states_73_strides_0, weight = model_model_layers_9_self_attn_v_proj_weight_palettized, x = var_6173)[name = string("value_states_73")]; tensor var_6215 = const()[name = string("op_6215"), val = tensor([1, 16, 128, 64])]; tensor var_6216 = reshape(shape = var_6215, x = query_states_73)[name = string("op_6216")]; tensor var_6221 = const()[name = string("op_6221"), val = tensor([0, 1, 3, 2])]; tensor var_6226 = const()[name = string("op_6226"), val = tensor([1, 8, 128, 64])]; tensor var_6227 = reshape(shape = var_6226, x = key_states_91)[name = string("op_6227")]; tensor var_6232 = const()[name = string("op_6232"), val = tensor([0, 1, 3, 2])]; tensor var_6237 = const()[name = string("op_6237"), val = tensor([1, 8, 128, 64])]; tensor var_6238 = reshape(shape = var_6237, x = value_states_73)[name = string("op_6238")]; tensor var_6243 = const()[name = string("op_6243"), val = tensor([0, 1, 3, 2])]; tensor mean_75_axes_0 = const()[name = string("mean_75_axes_0"), val = tensor([-1])]; bool mean_75_keep_dims_0 = const()[name = string("mean_75_keep_dims_0"), val = bool(true)]; tensor x_181 = transpose(perm = var_6221, x = var_6216)[name = string("transpose_169")]; tensor mean_75 = reduce_mean(axes = mean_75_axes_0, keep_dims = mean_75_keep_dims_0, x = x_181)[name = string("mean_75")]; tensor input_167 = sub(x = x_181, y = mean_75)[name = string("input_167")]; tensor var_6260_axes_0 = const()[name = string("op_6260_axes_0"), val = tensor([-1])]; tensor model_model_layers_9_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_9_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1039410304)))]; fp16 var_6248_to_fp16 = const()[name = string("op_6248_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6260_cast_fp16 = layer_norm(axes = var_6260_axes_0, epsilon = var_6248_to_fp16, gamma = model_model_layers_9_self_attn_q_norm_weight_to_fp16, x = input_167)[name = string("op_6260_cast_fp16")]; tensor mean_77_axes_0 = const()[name = string("mean_77_axes_0"), val = tensor([-1])]; bool mean_77_keep_dims_0 = const()[name = string("mean_77_keep_dims_0"), val = bool(true)]; tensor x_183 = transpose(perm = var_6232, x = var_6227)[name = string("transpose_168")]; tensor mean_77 = reduce_mean(axes = mean_77_axes_0, keep_dims = mean_77_keep_dims_0, x = x_183)[name = string("mean_77")]; tensor input_169 = sub(x = x_183, y = mean_77)[name = string("input_169")]; tensor var_6278_axes_0 = const()[name = string("op_6278_axes_0"), val = tensor([-1])]; tensor model_model_layers_9_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_9_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1039410624)))]; fp16 var_6266_to_fp16 = const()[name = string("op_6266_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6278_cast_fp16 = layer_norm(axes = var_6278_axes_0, epsilon = var_6266_to_fp16, gamma = model_model_layers_9_self_attn_k_norm_weight_to_fp16, x = input_169)[name = string("op_6278_cast_fp16")]; tensor var_6293 = mul(x = var_6260_cast_fp16, y = cos_5)[name = string("op_6293")]; tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_37 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = var_6260_cast_fp16)[name = string("x1_37")]; tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_37 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = var_6260_cast_fp16)[name = string("x2_37")]; fp16 const_205_promoted = const()[name = string("const_205_promoted"), val = fp16(-0x1p+0)]; tensor var_6314 = mul(x = x2_37, y = const_205_promoted)[name = string("op_6314")]; int32 var_6316 = const()[name = string("op_6316"), val = int32(-1)]; bool var_6317_interleave_0 = const()[name = string("op_6317_interleave_0"), val = bool(false)]; tensor var_6317 = concat(axis = var_6316, interleave = var_6317_interleave_0, values = (var_6314, x1_37))[name = string("op_6317")]; tensor var_6318 = mul(x = var_6317, y = sin_5)[name = string("op_6318")]; tensor query_states_75 = add(x = var_6293, y = var_6318)[name = string("query_states_75")]; tensor var_6321 = mul(x = var_6278_cast_fp16, y = cos_5)[name = string("op_6321")]; tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_39 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = var_6278_cast_fp16)[name = string("x1_39")]; tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_39 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = var_6278_cast_fp16)[name = string("x2_39")]; fp16 const_208_promoted = const()[name = string("const_208_promoted"), val = fp16(-0x1p+0)]; tensor var_6342 = mul(x = x2_39, y = const_208_promoted)[name = string("op_6342")]; int32 var_6344 = const()[name = string("op_6344"), val = int32(-1)]; bool var_6345_interleave_0 = const()[name = string("op_6345_interleave_0"), val = bool(false)]; tensor var_6345 = concat(axis = var_6344, interleave = var_6345_interleave_0, values = (var_6342, x1_39))[name = string("op_6345")]; tensor var_6346 = mul(x = var_6345, y = sin_5)[name = string("op_6346")]; tensor key_states_93 = add(x = var_6321, y = var_6346)[name = string("key_states_93")]; tensor expand_dims_108 = const()[name = string("expand_dims_108"), val = tensor([9])]; tensor expand_dims_109 = const()[name = string("expand_dims_109"), val = tensor([0])]; tensor expand_dims_111 = const()[name = string("expand_dims_111"), val = tensor([0])]; tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([10])]; int32 concat_164_axis_0 = const()[name = string("concat_164_axis_0"), val = int32(0)]; bool concat_164_interleave_0 = const()[name = string("concat_164_interleave_0"), val = bool(false)]; tensor concat_164 = concat(axis = concat_164_axis_0, interleave = concat_164_interleave_0, values = (expand_dims_108, expand_dims_109, current_pos, expand_dims_111))[name = string("concat_164")]; tensor concat_165_values1_0 = const()[name = string("concat_165_values1_0"), val = tensor([0])]; tensor concat_165_values3_0 = const()[name = string("concat_165_values3_0"), val = tensor([0])]; int32 concat_165_axis_0 = const()[name = string("concat_165_axis_0"), val = int32(0)]; bool concat_165_interleave_0 = const()[name = string("concat_165_interleave_0"), val = bool(false)]; tensor concat_165 = concat(axis = concat_165_axis_0, interleave = concat_165_interleave_0, values = (expand_dims_112, concat_165_values1_0, var_1760, concat_165_values3_0))[name = string("concat_165")]; tensor model_model_kv_cache_0_internal_tensor_assign_19_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_164, begin_mask = model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0, end = concat_165, end_mask = model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_19_stride_0, update = key_states_93, x = coreml_update_state_73)[name = string("model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_74_write_state")]; tensor coreml_update_state_74 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_74")]; tensor expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor([37])]; tensor expand_dims_115 = const()[name = string("expand_dims_115"), val = tensor([0])]; tensor expand_dims_117 = const()[name = string("expand_dims_117"), val = tensor([0])]; tensor expand_dims_118 = const()[name = string("expand_dims_118"), val = tensor([38])]; int32 concat_168_axis_0 = const()[name = string("concat_168_axis_0"), val = int32(0)]; bool concat_168_interleave_0 = const()[name = string("concat_168_interleave_0"), val = bool(false)]; tensor concat_168 = concat(axis = concat_168_axis_0, interleave = concat_168_interleave_0, values = (expand_dims_114, expand_dims_115, current_pos, expand_dims_117))[name = string("concat_168")]; tensor concat_169_values1_0 = const()[name = string("concat_169_values1_0"), val = tensor([0])]; tensor concat_169_values3_0 = const()[name = string("concat_169_values3_0"), val = tensor([0])]; int32 concat_169_axis_0 = const()[name = string("concat_169_axis_0"), val = int32(0)]; bool concat_169_interleave_0 = const()[name = string("concat_169_interleave_0"), val = bool(false)]; tensor concat_169 = concat(axis = concat_169_axis_0, interleave = concat_169_interleave_0, values = (expand_dims_118, concat_169_values1_0, var_1760, concat_169_values3_0))[name = string("concat_169")]; tensor model_model_kv_cache_0_internal_tensor_assign_20_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_75 = transpose(perm = var_6243, x = var_6238)[name = string("transpose_167")]; tensor model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_168, begin_mask = model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0, end = concat_169, end_mask = model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_20_stride_0, update = value_states_75, x = coreml_update_state_74)[name = string("model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_75_write_state")]; tensor coreml_update_state_75 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_75")]; tensor var_6417_begin_0 = const()[name = string("op_6417_begin_0"), val = tensor([9, 0, 0, 0])]; tensor var_6417_end_0 = const()[name = string("op_6417_end_0"), val = tensor([10, 8, 1024, 128])]; tensor var_6417_end_mask_0 = const()[name = string("op_6417_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6417_cast_fp16 = slice_by_index(begin = var_6417_begin_0, end = var_6417_end_0, end_mask = var_6417_end_mask_0, x = coreml_update_state_75)[name = string("op_6417_cast_fp16")]; tensor K_layer_cache_19_axes_0 = const()[name = string("K_layer_cache_19_axes_0"), val = tensor([0])]; tensor K_layer_cache_19_cast_fp16 = squeeze(axes = K_layer_cache_19_axes_0, x = var_6417_cast_fp16)[name = string("K_layer_cache_19_cast_fp16")]; tensor var_6424_begin_0 = const()[name = string("op_6424_begin_0"), val = tensor([37, 0, 0, 0])]; tensor var_6424_end_0 = const()[name = string("op_6424_end_0"), val = tensor([38, 8, 1024, 128])]; tensor var_6424_end_mask_0 = const()[name = string("op_6424_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6424_cast_fp16 = slice_by_index(begin = var_6424_begin_0, end = var_6424_end_0, end_mask = var_6424_end_mask_0, x = coreml_update_state_75)[name = string("op_6424_cast_fp16")]; tensor V_layer_cache_19_axes_0 = const()[name = string("V_layer_cache_19_axes_0"), val = tensor([0])]; tensor V_layer_cache_19_cast_fp16 = squeeze(axes = V_layer_cache_19_axes_0, x = var_6424_cast_fp16)[name = string("V_layer_cache_19_cast_fp16")]; tensor x_187_axes_0 = const()[name = string("x_187_axes_0"), val = tensor([1])]; tensor x_187_cast_fp16 = expand_dims(axes = x_187_axes_0, x = K_layer_cache_19_cast_fp16)[name = string("x_187_cast_fp16")]; tensor var_6453 = const()[name = string("op_6453"), val = tensor([1, 2, 1, 1])]; tensor x_189_cast_fp16 = tile(reps = var_6453, x = x_187_cast_fp16)[name = string("x_189_cast_fp16")]; tensor var_6465 = const()[name = string("op_6465"), val = tensor([1, -1, 1024, 128])]; tensor key_states_97_cast_fp16 = reshape(shape = var_6465, x = x_189_cast_fp16)[name = string("key_states_97_cast_fp16")]; tensor x_193_axes_0 = const()[name = string("x_193_axes_0"), val = tensor([1])]; tensor x_193_cast_fp16 = expand_dims(axes = x_193_axes_0, x = V_layer_cache_19_cast_fp16)[name = string("x_193_cast_fp16")]; tensor var_6473 = const()[name = string("op_6473"), val = tensor([1, 2, 1, 1])]; tensor x_195_cast_fp16 = tile(reps = var_6473, x = x_193_cast_fp16)[name = string("x_195_cast_fp16")]; bool var_6500_transpose_x_0 = const()[name = string("op_6500_transpose_x_0"), val = bool(false)]; bool var_6500_transpose_y_0 = const()[name = string("op_6500_transpose_y_0"), val = bool(true)]; tensor var_6500 = matmul(transpose_x = var_6500_transpose_x_0, transpose_y = var_6500_transpose_y_0, x = query_states_75, y = key_states_97_cast_fp16)[name = string("op_6500")]; fp16 var_6501_to_fp16 = const()[name = string("op_6501_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_37_cast_fp16 = mul(x = var_6500, y = var_6501_to_fp16)[name = string("attn_weights_37_cast_fp16")]; tensor attn_weights_39_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = causal_mask)[name = string("attn_weights_39_cast_fp16")]; int32 var_6536 = const()[name = string("op_6536"), val = int32(-1)]; tensor var_6538_cast_fp16 = softmax(axis = var_6536, x = attn_weights_39_cast_fp16)[name = string("op_6538_cast_fp16")]; tensor concat_174 = const()[name = string("concat_174"), val = tensor([16, 64, 1024])]; tensor reshape_27_cast_fp16 = reshape(shape = concat_174, x = var_6538_cast_fp16)[name = string("reshape_27_cast_fp16")]; tensor concat_175 = const()[name = string("concat_175"), val = tensor([16, 1024, 128])]; tensor reshape_28_cast_fp16 = reshape(shape = concat_175, x = x_195_cast_fp16)[name = string("reshape_28_cast_fp16")]; bool matmul_9_transpose_x_0 = const()[name = string("matmul_9_transpose_x_0"), val = bool(false)]; bool matmul_9_transpose_y_0 = const()[name = string("matmul_9_transpose_y_0"), val = bool(false)]; tensor matmul_9_cast_fp16 = matmul(transpose_x = matmul_9_transpose_x_0, transpose_y = matmul_9_transpose_y_0, x = reshape_27_cast_fp16, y = reshape_28_cast_fp16)[name = string("matmul_9_cast_fp16")]; tensor concat_179 = const()[name = string("concat_179"), val = tensor([1, 16, 64, 128])]; tensor reshape_29_cast_fp16 = reshape(shape = concat_179, x = matmul_9_cast_fp16)[name = string("reshape_29_cast_fp16")]; tensor var_6550_perm_0 = const()[name = string("op_6550_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_6569 = const()[name = string("op_6569"), val = tensor([1, 64, 2048])]; tensor var_6550_cast_fp16 = transpose(perm = var_6550_perm_0, x = reshape_29_cast_fp16)[name = string("transpose_166")]; tensor attn_output_95_cast_fp16 = reshape(shape = var_6569, x = var_6550_cast_fp16)[name = string("attn_output_95_cast_fp16")]; tensor var_6574 = const()[name = string("op_6574"), val = tensor([0, 2, 1])]; string var_6590_pad_type_0 = const()[name = string("op_6590_pad_type_0"), val = string("valid")]; int32 var_6590_groups_0 = const()[name = string("op_6590_groups_0"), val = int32(1)]; tensor var_6590_strides_0 = const()[name = string("op_6590_strides_0"), val = tensor([1])]; tensor var_6590_pad_0 = const()[name = string("op_6590_pad_0"), val = tensor([0, 0])]; tensor var_6590_dilations_0 = const()[name = string("op_6590_dilations_0"), val = tensor([1])]; tensor squeeze_9_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1039410944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1042556736))))[name = string("squeeze_9_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_6575_cast_fp16 = transpose(perm = var_6574, x = attn_output_95_cast_fp16)[name = string("transpose_165")]; tensor var_6590_cast_fp16 = conv(dilations = var_6590_dilations_0, groups = var_6590_groups_0, pad = var_6590_pad_0, pad_type = var_6590_pad_type_0, strides = var_6590_strides_0, weight = squeeze_9_cast_fp16_to_fp32_to_fp16_palettized, x = var_6575_cast_fp16)[name = string("op_6590_cast_fp16")]; tensor var_6594 = const()[name = string("op_6594"), val = tensor([0, 2, 1])]; tensor attn_output_99_cast_fp16 = transpose(perm = var_6594, x = var_6590_cast_fp16)[name = string("transpose_164")]; tensor hidden_states_59_cast_fp16 = add(x = hidden_states_55_cast_fp16, y = attn_output_99_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; tensor mean_79_axes_0 = const()[name = string("mean_79_axes_0"), val = tensor([-1])]; bool mean_79_keep_dims_0 = const()[name = string("mean_79_keep_dims_0"), val = bool(true)]; tensor mean_79_cast_fp16 = reduce_mean(axes = mean_79_axes_0, keep_dims = mean_79_keep_dims_0, x = hidden_states_59_cast_fp16)[name = string("mean_79_cast_fp16")]; tensor input_173_cast_fp16 = sub(x = hidden_states_59_cast_fp16, y = mean_79_cast_fp16)[name = string("input_173_cast_fp16")]; tensor var_6613_axes_0 = const()[name = string("op_6613_axes_0"), val = tensor([-1])]; tensor model_model_layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1042589568)))]; fp16 var_6601_to_fp16 = const()[name = string("op_6601_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6613_cast_fp16 = layer_norm(axes = var_6613_axes_0, epsilon = var_6601_to_fp16, gamma = model_model_layers_9_post_attention_layernorm_weight_to_fp16, x = input_173_cast_fp16)[name = string("op_6613_cast_fp16")]; tensor var_6627 = const()[name = string("op_6627"), val = tensor([0, 2, 1])]; tensor input_175_axes_0 = const()[name = string("input_175_axes_0"), val = tensor([2])]; tensor var_6628 = transpose(perm = var_6627, x = var_6613_cast_fp16)[name = string("transpose_163")]; tensor input_175 = expand_dims(axes = input_175_axes_0, x = var_6628)[name = string("input_175")]; string input_177_pad_type_0 = const()[name = string("input_177_pad_type_0"), val = string("valid")]; tensor input_177_strides_0 = const()[name = string("input_177_strides_0"), val = tensor([1, 1])]; tensor input_177_pad_0 = const()[name = string("input_177_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_177_dilations_0 = const()[name = string("input_177_dilations_0"), val = tensor([1, 1])]; int32 input_177_groups_0 = const()[name = string("input_177_groups_0"), val = int32(1)]; tensor input_177 = conv(dilations = input_177_dilations_0, groups = input_177_groups_0, pad = input_177_pad_0, pad_type = input_177_pad_type_0, strides = input_177_strides_0, weight = model_model_layers_9_mlp_gate_proj_weight_palettized, x = input_175)[name = string("input_177")]; string b_19_pad_type_0 = const()[name = string("b_19_pad_type_0"), val = string("valid")]; tensor b_19_strides_0 = const()[name = string("b_19_strides_0"), val = tensor([1, 1])]; tensor b_19_pad_0 = const()[name = string("b_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_19_dilations_0 = const()[name = string("b_19_dilations_0"), val = tensor([1, 1])]; int32 b_19_groups_0 = const()[name = string("b_19_groups_0"), val = int32(1)]; tensor b_19 = conv(dilations = b_19_dilations_0, groups = b_19_groups_0, pad = b_19_pad_0, pad_type = b_19_pad_type_0, strides = b_19_strides_0, weight = model_model_layers_9_mlp_up_proj_weight_palettized, x = input_175)[name = string("b_19")]; tensor c_19 = silu(x = input_177)[name = string("c_19")]; tensor input_179 = mul(x = c_19, y = b_19)[name = string("input_179")]; string e_19_pad_type_0 = const()[name = string("e_19_pad_type_0"), val = string("valid")]; tensor e_19_strides_0 = const()[name = string("e_19_strides_0"), val = tensor([1, 1])]; tensor e_19_pad_0 = const()[name = string("e_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_19_dilations_0 = const()[name = string("e_19_dilations_0"), val = tensor([1, 1])]; int32 e_19_groups_0 = const()[name = string("e_19_groups_0"), val = int32(1)]; tensor e_19 = conv(dilations = e_19_dilations_0, groups = e_19_groups_0, pad = e_19_pad_0, pad_type = e_19_pad_type_0, strides = e_19_strides_0, weight = model_model_layers_9_mlp_down_proj_weight_palettized, x = input_179)[name = string("e_19")]; tensor var_6650_axes_0 = const()[name = string("op_6650_axes_0"), val = tensor([2])]; tensor var_6650 = squeeze(axes = var_6650_axes_0, x = e_19)[name = string("op_6650")]; tensor var_6651 = const()[name = string("op_6651"), val = tensor([0, 2, 1])]; tensor var_6652 = transpose(perm = var_6651, x = var_6650)[name = string("transpose_162")]; tensor hidden_states_61_cast_fp16 = add(x = hidden_states_59_cast_fp16, y = var_6652)[name = string("hidden_states_61_cast_fp16")]; tensor mean_81_axes_0 = const()[name = string("mean_81_axes_0"), val = tensor([-1])]; bool mean_81_keep_dims_0 = const()[name = string("mean_81_keep_dims_0"), val = bool(true)]; tensor mean_81_cast_fp16 = reduce_mean(axes = mean_81_axes_0, keep_dims = mean_81_keep_dims_0, x = hidden_states_61_cast_fp16)[name = string("mean_81_cast_fp16")]; tensor input_181_cast_fp16 = sub(x = hidden_states_61_cast_fp16, y = mean_81_cast_fp16)[name = string("input_181_cast_fp16")]; tensor var_6670_axes_0 = const()[name = string("op_6670_axes_0"), val = tensor([-1])]; tensor model_model_layers_10_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1042593728)))]; fp16 var_6658_to_fp16 = const()[name = string("op_6658_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6670_cast_fp16 = layer_norm(axes = var_6670_axes_0, epsilon = var_6658_to_fp16, gamma = model_model_layers_10_input_layernorm_weight_to_fp16, x = input_181_cast_fp16)[name = string("op_6670_cast_fp16")]; tensor var_6682 = const()[name = string("op_6682"), val = tensor([0, 2, 1])]; tensor var_6685_axes_0 = const()[name = string("op_6685_axes_0"), val = tensor([2])]; tensor var_6683 = transpose(perm = var_6682, x = var_6670_cast_fp16)[name = string("transpose_161")]; tensor var_6685 = expand_dims(axes = var_6685_axes_0, x = var_6683)[name = string("op_6685")]; string query_states_81_pad_type_0 = const()[name = string("query_states_81_pad_type_0"), val = string("valid")]; tensor query_states_81_strides_0 = const()[name = string("query_states_81_strides_0"), val = tensor([1, 1])]; tensor query_states_81_pad_0 = const()[name = string("query_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_81_dilations_0 = const()[name = string("query_states_81_dilations_0"), val = tensor([1, 1])]; int32 query_states_81_groups_0 = const()[name = string("query_states_81_groups_0"), val = int32(1)]; tensor query_states_81 = conv(dilations = query_states_81_dilations_0, groups = query_states_81_groups_0, pad = query_states_81_pad_0, pad_type = query_states_81_pad_type_0, strides = query_states_81_strides_0, weight = model_model_layers_10_self_attn_q_proj_weight_palettized, x = var_6685)[name = string("query_states_81")]; string key_states_101_pad_type_0 = const()[name = string("key_states_101_pad_type_0"), val = string("valid")]; tensor key_states_101_strides_0 = const()[name = string("key_states_101_strides_0"), val = tensor([1, 1])]; tensor key_states_101_pad_0 = const()[name = string("key_states_101_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_101_dilations_0 = const()[name = string("key_states_101_dilations_0"), val = tensor([1, 1])]; int32 key_states_101_groups_0 = const()[name = string("key_states_101_groups_0"), val = int32(1)]; tensor key_states_101 = conv(dilations = key_states_101_dilations_0, groups = key_states_101_groups_0, pad = key_states_101_pad_0, pad_type = key_states_101_pad_type_0, strides = key_states_101_strides_0, weight = model_model_layers_10_self_attn_k_proj_weight_palettized, x = var_6685)[name = string("key_states_101")]; string value_states_81_pad_type_0 = const()[name = string("value_states_81_pad_type_0"), val = string("valid")]; tensor value_states_81_strides_0 = const()[name = string("value_states_81_strides_0"), val = tensor([1, 1])]; tensor value_states_81_pad_0 = const()[name = string("value_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_81_dilations_0 = const()[name = string("value_states_81_dilations_0"), val = tensor([1, 1])]; int32 value_states_81_groups_0 = const()[name = string("value_states_81_groups_0"), val = int32(1)]; tensor value_states_81 = conv(dilations = value_states_81_dilations_0, groups = value_states_81_groups_0, pad = value_states_81_pad_0, pad_type = value_states_81_pad_type_0, strides = value_states_81_strides_0, weight = model_model_layers_10_self_attn_v_proj_weight_palettized, x = var_6685)[name = string("value_states_81")]; tensor var_6727 = const()[name = string("op_6727"), val = tensor([1, 16, 128, 64])]; tensor var_6728 = reshape(shape = var_6727, x = query_states_81)[name = string("op_6728")]; tensor var_6733 = const()[name = string("op_6733"), val = tensor([0, 1, 3, 2])]; tensor var_6738 = const()[name = string("op_6738"), val = tensor([1, 8, 128, 64])]; tensor var_6739 = reshape(shape = var_6738, x = key_states_101)[name = string("op_6739")]; tensor var_6744 = const()[name = string("op_6744"), val = tensor([0, 1, 3, 2])]; tensor var_6749 = const()[name = string("op_6749"), val = tensor([1, 8, 128, 64])]; tensor var_6750 = reshape(shape = var_6749, x = value_states_81)[name = string("op_6750")]; tensor var_6755 = const()[name = string("op_6755"), val = tensor([0, 1, 3, 2])]; tensor mean_83_axes_0 = const()[name = string("mean_83_axes_0"), val = tensor([-1])]; bool mean_83_keep_dims_0 = const()[name = string("mean_83_keep_dims_0"), val = bool(true)]; tensor x_201 = transpose(perm = var_6733, x = var_6728)[name = string("transpose_160")]; tensor mean_83 = reduce_mean(axes = mean_83_axes_0, keep_dims = mean_83_keep_dims_0, x = x_201)[name = string("mean_83")]; tensor input_185 = sub(x = x_201, y = mean_83)[name = string("input_185")]; tensor var_6772_axes_0 = const()[name = string("op_6772_axes_0"), val = tensor([-1])]; tensor model_model_layers_10_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_10_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1042597888)))]; fp16 var_6760_to_fp16 = const()[name = string("op_6760_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6772_cast_fp16 = layer_norm(axes = var_6772_axes_0, epsilon = var_6760_to_fp16, gamma = model_model_layers_10_self_attn_q_norm_weight_to_fp16, x = input_185)[name = string("op_6772_cast_fp16")]; tensor mean_85_axes_0 = const()[name = string("mean_85_axes_0"), val = tensor([-1])]; bool mean_85_keep_dims_0 = const()[name = string("mean_85_keep_dims_0"), val = bool(true)]; tensor x_203 = transpose(perm = var_6744, x = var_6739)[name = string("transpose_159")]; tensor mean_85 = reduce_mean(axes = mean_85_axes_0, keep_dims = mean_85_keep_dims_0, x = x_203)[name = string("mean_85")]; tensor input_187 = sub(x = x_203, y = mean_85)[name = string("input_187")]; tensor var_6790_axes_0 = const()[name = string("op_6790_axes_0"), val = tensor([-1])]; tensor model_model_layers_10_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_10_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1042598208)))]; fp16 var_6778_to_fp16 = const()[name = string("op_6778_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6790_cast_fp16 = layer_norm(axes = var_6790_axes_0, epsilon = var_6778_to_fp16, gamma = model_model_layers_10_self_attn_k_norm_weight_to_fp16, x = input_187)[name = string("op_6790_cast_fp16")]; tensor var_6805 = mul(x = var_6772_cast_fp16, y = cos_5)[name = string("op_6805")]; tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_41 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = var_6772_cast_fp16)[name = string("x1_41")]; tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_41 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = var_6772_cast_fp16)[name = string("x2_41")]; fp16 const_227_promoted = const()[name = string("const_227_promoted"), val = fp16(-0x1p+0)]; tensor var_6826 = mul(x = x2_41, y = const_227_promoted)[name = string("op_6826")]; int32 var_6828 = const()[name = string("op_6828"), val = int32(-1)]; bool var_6829_interleave_0 = const()[name = string("op_6829_interleave_0"), val = bool(false)]; tensor var_6829 = concat(axis = var_6828, interleave = var_6829_interleave_0, values = (var_6826, x1_41))[name = string("op_6829")]; tensor var_6830 = mul(x = var_6829, y = sin_5)[name = string("op_6830")]; tensor query_states_83 = add(x = var_6805, y = var_6830)[name = string("query_states_83")]; tensor var_6833 = mul(x = var_6790_cast_fp16, y = cos_5)[name = string("op_6833")]; tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_43 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = var_6790_cast_fp16)[name = string("x1_43")]; tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_43 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = var_6790_cast_fp16)[name = string("x2_43")]; fp16 const_230_promoted = const()[name = string("const_230_promoted"), val = fp16(-0x1p+0)]; tensor var_6854 = mul(x = x2_43, y = const_230_promoted)[name = string("op_6854")]; int32 var_6856 = const()[name = string("op_6856"), val = int32(-1)]; bool var_6857_interleave_0 = const()[name = string("op_6857_interleave_0"), val = bool(false)]; tensor var_6857 = concat(axis = var_6856, interleave = var_6857_interleave_0, values = (var_6854, x1_43))[name = string("op_6857")]; tensor var_6858 = mul(x = var_6857, y = sin_5)[name = string("op_6858")]; tensor key_states_103 = add(x = var_6833, y = var_6858)[name = string("key_states_103")]; tensor expand_dims_120 = const()[name = string("expand_dims_120"), val = tensor([10])]; tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([0])]; tensor expand_dims_123 = const()[name = string("expand_dims_123"), val = tensor([0])]; tensor expand_dims_124 = const()[name = string("expand_dims_124"), val = tensor([11])]; int32 concat_182_axis_0 = const()[name = string("concat_182_axis_0"), val = int32(0)]; bool concat_182_interleave_0 = const()[name = string("concat_182_interleave_0"), val = bool(false)]; tensor concat_182 = concat(axis = concat_182_axis_0, interleave = concat_182_interleave_0, values = (expand_dims_120, expand_dims_121, current_pos, expand_dims_123))[name = string("concat_182")]; tensor concat_183_values1_0 = const()[name = string("concat_183_values1_0"), val = tensor([0])]; tensor concat_183_values3_0 = const()[name = string("concat_183_values3_0"), val = tensor([0])]; int32 concat_183_axis_0 = const()[name = string("concat_183_axis_0"), val = int32(0)]; bool concat_183_interleave_0 = const()[name = string("concat_183_interleave_0"), val = bool(false)]; tensor concat_183 = concat(axis = concat_183_axis_0, interleave = concat_183_interleave_0, values = (expand_dims_124, concat_183_values1_0, var_1760, concat_183_values3_0))[name = string("concat_183")]; tensor model_model_kv_cache_0_internal_tensor_assign_21_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_182, begin_mask = model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0, end = concat_183, end_mask = model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_21_stride_0, update = key_states_103, x = coreml_update_state_75)[name = string("model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_76_write_state")]; tensor coreml_update_state_76 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_76")]; tensor expand_dims_126 = const()[name = string("expand_dims_126"), val = tensor([38])]; tensor expand_dims_127 = const()[name = string("expand_dims_127"), val = tensor([0])]; tensor expand_dims_129 = const()[name = string("expand_dims_129"), val = tensor([0])]; tensor expand_dims_130 = const()[name = string("expand_dims_130"), val = tensor([39])]; int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)]; bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)]; tensor concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (expand_dims_126, expand_dims_127, current_pos, expand_dims_129))[name = string("concat_186")]; tensor concat_187_values1_0 = const()[name = string("concat_187_values1_0"), val = tensor([0])]; tensor concat_187_values3_0 = const()[name = string("concat_187_values3_0"), val = tensor([0])]; int32 concat_187_axis_0 = const()[name = string("concat_187_axis_0"), val = int32(0)]; bool concat_187_interleave_0 = const()[name = string("concat_187_interleave_0"), val = bool(false)]; tensor concat_187 = concat(axis = concat_187_axis_0, interleave = concat_187_interleave_0, values = (expand_dims_130, concat_187_values1_0, var_1760, concat_187_values3_0))[name = string("concat_187")]; tensor model_model_kv_cache_0_internal_tensor_assign_22_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_83 = transpose(perm = var_6755, x = var_6750)[name = string("transpose_158")]; tensor model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_186, begin_mask = model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0, end = concat_187, end_mask = model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_22_stride_0, update = value_states_83, x = coreml_update_state_76)[name = string("model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_77_write_state")]; tensor coreml_update_state_77 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_77")]; tensor var_6929_begin_0 = const()[name = string("op_6929_begin_0"), val = tensor([10, 0, 0, 0])]; tensor var_6929_end_0 = const()[name = string("op_6929_end_0"), val = tensor([11, 8, 1024, 128])]; tensor var_6929_end_mask_0 = const()[name = string("op_6929_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6929_cast_fp16 = slice_by_index(begin = var_6929_begin_0, end = var_6929_end_0, end_mask = var_6929_end_mask_0, x = coreml_update_state_77)[name = string("op_6929_cast_fp16")]; tensor K_layer_cache_21_axes_0 = const()[name = string("K_layer_cache_21_axes_0"), val = tensor([0])]; tensor K_layer_cache_21_cast_fp16 = squeeze(axes = K_layer_cache_21_axes_0, x = var_6929_cast_fp16)[name = string("K_layer_cache_21_cast_fp16")]; tensor var_6936_begin_0 = const()[name = string("op_6936_begin_0"), val = tensor([38, 0, 0, 0])]; tensor var_6936_end_0 = const()[name = string("op_6936_end_0"), val = tensor([39, 8, 1024, 128])]; tensor var_6936_end_mask_0 = const()[name = string("op_6936_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6936_cast_fp16 = slice_by_index(begin = var_6936_begin_0, end = var_6936_end_0, end_mask = var_6936_end_mask_0, x = coreml_update_state_77)[name = string("op_6936_cast_fp16")]; tensor V_layer_cache_21_axes_0 = const()[name = string("V_layer_cache_21_axes_0"), val = tensor([0])]; tensor V_layer_cache_21_cast_fp16 = squeeze(axes = V_layer_cache_21_axes_0, x = var_6936_cast_fp16)[name = string("V_layer_cache_21_cast_fp16")]; tensor x_207_axes_0 = const()[name = string("x_207_axes_0"), val = tensor([1])]; tensor x_207_cast_fp16 = expand_dims(axes = x_207_axes_0, x = K_layer_cache_21_cast_fp16)[name = string("x_207_cast_fp16")]; tensor var_6965 = const()[name = string("op_6965"), val = tensor([1, 2, 1, 1])]; tensor x_209_cast_fp16 = tile(reps = var_6965, x = x_207_cast_fp16)[name = string("x_209_cast_fp16")]; tensor var_6977 = const()[name = string("op_6977"), val = tensor([1, -1, 1024, 128])]; tensor key_states_107_cast_fp16 = reshape(shape = var_6977, x = x_209_cast_fp16)[name = string("key_states_107_cast_fp16")]; tensor x_213_axes_0 = const()[name = string("x_213_axes_0"), val = tensor([1])]; tensor x_213_cast_fp16 = expand_dims(axes = x_213_axes_0, x = V_layer_cache_21_cast_fp16)[name = string("x_213_cast_fp16")]; tensor var_6985 = const()[name = string("op_6985"), val = tensor([1, 2, 1, 1])]; tensor x_215_cast_fp16 = tile(reps = var_6985, x = x_213_cast_fp16)[name = string("x_215_cast_fp16")]; bool var_7012_transpose_x_0 = const()[name = string("op_7012_transpose_x_0"), val = bool(false)]; bool var_7012_transpose_y_0 = const()[name = string("op_7012_transpose_y_0"), val = bool(true)]; tensor var_7012 = matmul(transpose_x = var_7012_transpose_x_0, transpose_y = var_7012_transpose_y_0, x = query_states_83, y = key_states_107_cast_fp16)[name = string("op_7012")]; fp16 var_7013_to_fp16 = const()[name = string("op_7013_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_41_cast_fp16 = mul(x = var_7012, y = var_7013_to_fp16)[name = string("attn_weights_41_cast_fp16")]; tensor attn_weights_43_cast_fp16 = add(x = attn_weights_41_cast_fp16, y = causal_mask)[name = string("attn_weights_43_cast_fp16")]; int32 var_7048 = const()[name = string("op_7048"), val = int32(-1)]; tensor var_7050_cast_fp16 = softmax(axis = var_7048, x = attn_weights_43_cast_fp16)[name = string("op_7050_cast_fp16")]; tensor concat_192 = const()[name = string("concat_192"), val = tensor([16, 64, 1024])]; tensor reshape_30_cast_fp16 = reshape(shape = concat_192, x = var_7050_cast_fp16)[name = string("reshape_30_cast_fp16")]; tensor concat_193 = const()[name = string("concat_193"), val = tensor([16, 1024, 128])]; tensor reshape_31_cast_fp16 = reshape(shape = concat_193, x = x_215_cast_fp16)[name = string("reshape_31_cast_fp16")]; bool matmul_10_transpose_x_0 = const()[name = string("matmul_10_transpose_x_0"), val = bool(false)]; bool matmul_10_transpose_y_0 = const()[name = string("matmul_10_transpose_y_0"), val = bool(false)]; tensor matmul_10_cast_fp16 = matmul(transpose_x = matmul_10_transpose_x_0, transpose_y = matmul_10_transpose_y_0, x = reshape_30_cast_fp16, y = reshape_31_cast_fp16)[name = string("matmul_10_cast_fp16")]; tensor concat_197 = const()[name = string("concat_197"), val = tensor([1, 16, 64, 128])]; tensor reshape_32_cast_fp16 = reshape(shape = concat_197, x = matmul_10_cast_fp16)[name = string("reshape_32_cast_fp16")]; tensor var_7062_perm_0 = const()[name = string("op_7062_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_7081 = const()[name = string("op_7081"), val = tensor([1, 64, 2048])]; tensor var_7062_cast_fp16 = transpose(perm = var_7062_perm_0, x = reshape_32_cast_fp16)[name = string("transpose_157")]; tensor attn_output_105_cast_fp16 = reshape(shape = var_7081, x = var_7062_cast_fp16)[name = string("attn_output_105_cast_fp16")]; tensor var_7086 = const()[name = string("op_7086"), val = tensor([0, 2, 1])]; string var_7102_pad_type_0 = const()[name = string("op_7102_pad_type_0"), val = string("valid")]; int32 var_7102_groups_0 = const()[name = string("op_7102_groups_0"), val = int32(1)]; tensor var_7102_strides_0 = const()[name = string("op_7102_strides_0"), val = tensor([1])]; tensor var_7102_pad_0 = const()[name = string("op_7102_pad_0"), val = tensor([0, 0])]; tensor var_7102_dilations_0 = const()[name = string("op_7102_dilations_0"), val = tensor([1])]; tensor squeeze_10_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1042598528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1045744320))))[name = string("squeeze_10_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_7087_cast_fp16 = transpose(perm = var_7086, x = attn_output_105_cast_fp16)[name = string("transpose_156")]; tensor var_7102_cast_fp16 = conv(dilations = var_7102_dilations_0, groups = var_7102_groups_0, pad = var_7102_pad_0, pad_type = var_7102_pad_type_0, strides = var_7102_strides_0, weight = squeeze_10_cast_fp16_to_fp32_to_fp16_palettized, x = var_7087_cast_fp16)[name = string("op_7102_cast_fp16")]; tensor var_7106 = const()[name = string("op_7106"), val = tensor([0, 2, 1])]; tensor attn_output_109_cast_fp16 = transpose(perm = var_7106, x = var_7102_cast_fp16)[name = string("transpose_155")]; tensor hidden_states_65_cast_fp16 = add(x = hidden_states_61_cast_fp16, y = attn_output_109_cast_fp16)[name = string("hidden_states_65_cast_fp16")]; tensor mean_87_axes_0 = const()[name = string("mean_87_axes_0"), val = tensor([-1])]; bool mean_87_keep_dims_0 = const()[name = string("mean_87_keep_dims_0"), val = bool(true)]; tensor mean_87_cast_fp16 = reduce_mean(axes = mean_87_axes_0, keep_dims = mean_87_keep_dims_0, x = hidden_states_65_cast_fp16)[name = string("mean_87_cast_fp16")]; tensor input_191_cast_fp16 = sub(x = hidden_states_65_cast_fp16, y = mean_87_cast_fp16)[name = string("input_191_cast_fp16")]; tensor var_7125_axes_0 = const()[name = string("op_7125_axes_0"), val = tensor([-1])]; tensor model_model_layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1045777152)))]; fp16 var_7113_to_fp16 = const()[name = string("op_7113_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7125_cast_fp16 = layer_norm(axes = var_7125_axes_0, epsilon = var_7113_to_fp16, gamma = model_model_layers_10_post_attention_layernorm_weight_to_fp16, x = input_191_cast_fp16)[name = string("op_7125_cast_fp16")]; tensor var_7139 = const()[name = string("op_7139"), val = tensor([0, 2, 1])]; tensor input_193_axes_0 = const()[name = string("input_193_axes_0"), val = tensor([2])]; tensor var_7140 = transpose(perm = var_7139, x = var_7125_cast_fp16)[name = string("transpose_154")]; tensor input_193 = expand_dims(axes = input_193_axes_0, x = var_7140)[name = string("input_193")]; string input_195_pad_type_0 = const()[name = string("input_195_pad_type_0"), val = string("valid")]; tensor input_195_strides_0 = const()[name = string("input_195_strides_0"), val = tensor([1, 1])]; tensor input_195_pad_0 = const()[name = string("input_195_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_195_dilations_0 = const()[name = string("input_195_dilations_0"), val = tensor([1, 1])]; int32 input_195_groups_0 = const()[name = string("input_195_groups_0"), val = int32(1)]; tensor input_195 = conv(dilations = input_195_dilations_0, groups = input_195_groups_0, pad = input_195_pad_0, pad_type = input_195_pad_type_0, strides = input_195_strides_0, weight = model_model_layers_10_mlp_gate_proj_weight_palettized, x = input_193)[name = string("input_195")]; string b_21_pad_type_0 = const()[name = string("b_21_pad_type_0"), val = string("valid")]; tensor b_21_strides_0 = const()[name = string("b_21_strides_0"), val = tensor([1, 1])]; tensor b_21_pad_0 = const()[name = string("b_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_21_dilations_0 = const()[name = string("b_21_dilations_0"), val = tensor([1, 1])]; int32 b_21_groups_0 = const()[name = string("b_21_groups_0"), val = int32(1)]; tensor b_21 = conv(dilations = b_21_dilations_0, groups = b_21_groups_0, pad = b_21_pad_0, pad_type = b_21_pad_type_0, strides = b_21_strides_0, weight = model_model_layers_10_mlp_up_proj_weight_palettized, x = input_193)[name = string("b_21")]; tensor c_21 = silu(x = input_195)[name = string("c_21")]; tensor input_197 = mul(x = c_21, y = b_21)[name = string("input_197")]; string e_21_pad_type_0 = const()[name = string("e_21_pad_type_0"), val = string("valid")]; tensor e_21_strides_0 = const()[name = string("e_21_strides_0"), val = tensor([1, 1])]; tensor e_21_pad_0 = const()[name = string("e_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_21_dilations_0 = const()[name = string("e_21_dilations_0"), val = tensor([1, 1])]; int32 e_21_groups_0 = const()[name = string("e_21_groups_0"), val = int32(1)]; tensor e_21 = conv(dilations = e_21_dilations_0, groups = e_21_groups_0, pad = e_21_pad_0, pad_type = e_21_pad_type_0, strides = e_21_strides_0, weight = model_model_layers_10_mlp_down_proj_weight_palettized, x = input_197)[name = string("e_21")]; tensor var_7162_axes_0 = const()[name = string("op_7162_axes_0"), val = tensor([2])]; tensor var_7162 = squeeze(axes = var_7162_axes_0, x = e_21)[name = string("op_7162")]; tensor var_7163 = const()[name = string("op_7163"), val = tensor([0, 2, 1])]; tensor var_7164 = transpose(perm = var_7163, x = var_7162)[name = string("transpose_153")]; tensor hidden_states_67_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = var_7164)[name = string("hidden_states_67_cast_fp16")]; tensor mean_89_axes_0 = const()[name = string("mean_89_axes_0"), val = tensor([-1])]; bool mean_89_keep_dims_0 = const()[name = string("mean_89_keep_dims_0"), val = bool(true)]; tensor mean_89_cast_fp16 = reduce_mean(axes = mean_89_axes_0, keep_dims = mean_89_keep_dims_0, x = hidden_states_67_cast_fp16)[name = string("mean_89_cast_fp16")]; tensor input_199_cast_fp16 = sub(x = hidden_states_67_cast_fp16, y = mean_89_cast_fp16)[name = string("input_199_cast_fp16")]; tensor var_7182_axes_0 = const()[name = string("op_7182_axes_0"), val = tensor([-1])]; tensor model_model_layers_11_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1045781312)))]; fp16 var_7170_to_fp16 = const()[name = string("op_7170_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7182_cast_fp16 = layer_norm(axes = var_7182_axes_0, epsilon = var_7170_to_fp16, gamma = model_model_layers_11_input_layernorm_weight_to_fp16, x = input_199_cast_fp16)[name = string("op_7182_cast_fp16")]; tensor var_7194 = const()[name = string("op_7194"), val = tensor([0, 2, 1])]; tensor var_7197_axes_0 = const()[name = string("op_7197_axes_0"), val = tensor([2])]; tensor var_7195 = transpose(perm = var_7194, x = var_7182_cast_fp16)[name = string("transpose_152")]; tensor var_7197 = expand_dims(axes = var_7197_axes_0, x = var_7195)[name = string("op_7197")]; string query_states_89_pad_type_0 = const()[name = string("query_states_89_pad_type_0"), val = string("valid")]; tensor query_states_89_strides_0 = const()[name = string("query_states_89_strides_0"), val = tensor([1, 1])]; tensor query_states_89_pad_0 = const()[name = string("query_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_89_dilations_0 = const()[name = string("query_states_89_dilations_0"), val = tensor([1, 1])]; int32 query_states_89_groups_0 = const()[name = string("query_states_89_groups_0"), val = int32(1)]; tensor query_states_89 = conv(dilations = query_states_89_dilations_0, groups = query_states_89_groups_0, pad = query_states_89_pad_0, pad_type = query_states_89_pad_type_0, strides = query_states_89_strides_0, weight = model_model_layers_11_self_attn_q_proj_weight_palettized, x = var_7197)[name = string("query_states_89")]; string key_states_111_pad_type_0 = const()[name = string("key_states_111_pad_type_0"), val = string("valid")]; tensor key_states_111_strides_0 = const()[name = string("key_states_111_strides_0"), val = tensor([1, 1])]; tensor key_states_111_pad_0 = const()[name = string("key_states_111_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_111_dilations_0 = const()[name = string("key_states_111_dilations_0"), val = tensor([1, 1])]; int32 key_states_111_groups_0 = const()[name = string("key_states_111_groups_0"), val = int32(1)]; tensor key_states_111 = conv(dilations = key_states_111_dilations_0, groups = key_states_111_groups_0, pad = key_states_111_pad_0, pad_type = key_states_111_pad_type_0, strides = key_states_111_strides_0, weight = model_model_layers_11_self_attn_k_proj_weight_palettized, x = var_7197)[name = string("key_states_111")]; string value_states_89_pad_type_0 = const()[name = string("value_states_89_pad_type_0"), val = string("valid")]; tensor value_states_89_strides_0 = const()[name = string("value_states_89_strides_0"), val = tensor([1, 1])]; tensor value_states_89_pad_0 = const()[name = string("value_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_89_dilations_0 = const()[name = string("value_states_89_dilations_0"), val = tensor([1, 1])]; int32 value_states_89_groups_0 = const()[name = string("value_states_89_groups_0"), val = int32(1)]; tensor value_states_89 = conv(dilations = value_states_89_dilations_0, groups = value_states_89_groups_0, pad = value_states_89_pad_0, pad_type = value_states_89_pad_type_0, strides = value_states_89_strides_0, weight = model_model_layers_11_self_attn_v_proj_weight_palettized, x = var_7197)[name = string("value_states_89")]; tensor var_7239 = const()[name = string("op_7239"), val = tensor([1, 16, 128, 64])]; tensor var_7240 = reshape(shape = var_7239, x = query_states_89)[name = string("op_7240")]; tensor var_7245 = const()[name = string("op_7245"), val = tensor([0, 1, 3, 2])]; tensor var_7250 = const()[name = string("op_7250"), val = tensor([1, 8, 128, 64])]; tensor var_7251 = reshape(shape = var_7250, x = key_states_111)[name = string("op_7251")]; tensor var_7256 = const()[name = string("op_7256"), val = tensor([0, 1, 3, 2])]; tensor var_7261 = const()[name = string("op_7261"), val = tensor([1, 8, 128, 64])]; tensor var_7262 = reshape(shape = var_7261, x = value_states_89)[name = string("op_7262")]; tensor var_7267 = const()[name = string("op_7267"), val = tensor([0, 1, 3, 2])]; tensor mean_91_axes_0 = const()[name = string("mean_91_axes_0"), val = tensor([-1])]; bool mean_91_keep_dims_0 = const()[name = string("mean_91_keep_dims_0"), val = bool(true)]; tensor x_221 = transpose(perm = var_7245, x = var_7240)[name = string("transpose_151")]; tensor mean_91 = reduce_mean(axes = mean_91_axes_0, keep_dims = mean_91_keep_dims_0, x = x_221)[name = string("mean_91")]; tensor input_203 = sub(x = x_221, y = mean_91)[name = string("input_203")]; tensor var_7284_axes_0 = const()[name = string("op_7284_axes_0"), val = tensor([-1])]; tensor model_model_layers_11_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_11_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1045785472)))]; fp16 var_7272_to_fp16 = const()[name = string("op_7272_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7284_cast_fp16 = layer_norm(axes = var_7284_axes_0, epsilon = var_7272_to_fp16, gamma = model_model_layers_11_self_attn_q_norm_weight_to_fp16, x = input_203)[name = string("op_7284_cast_fp16")]; tensor mean_93_axes_0 = const()[name = string("mean_93_axes_0"), val = tensor([-1])]; bool mean_93_keep_dims_0 = const()[name = string("mean_93_keep_dims_0"), val = bool(true)]; tensor x_223 = transpose(perm = var_7256, x = var_7251)[name = string("transpose_150")]; tensor mean_93 = reduce_mean(axes = mean_93_axes_0, keep_dims = mean_93_keep_dims_0, x = x_223)[name = string("mean_93")]; tensor input_205 = sub(x = x_223, y = mean_93)[name = string("input_205")]; tensor var_7302_axes_0 = const()[name = string("op_7302_axes_0"), val = tensor([-1])]; tensor model_model_layers_11_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_11_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1045785792)))]; fp16 var_7290_to_fp16 = const()[name = string("op_7290_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7302_cast_fp16 = layer_norm(axes = var_7302_axes_0, epsilon = var_7290_to_fp16, gamma = model_model_layers_11_self_attn_k_norm_weight_to_fp16, x = input_205)[name = string("op_7302_cast_fp16")]; tensor var_7317 = mul(x = var_7284_cast_fp16, y = cos_5)[name = string("op_7317")]; tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_45 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = var_7284_cast_fp16)[name = string("x1_45")]; tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_45 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = var_7284_cast_fp16)[name = string("x2_45")]; fp16 const_249_promoted = const()[name = string("const_249_promoted"), val = fp16(-0x1p+0)]; tensor var_7338 = mul(x = x2_45, y = const_249_promoted)[name = string("op_7338")]; int32 var_7340 = const()[name = string("op_7340"), val = int32(-1)]; bool var_7341_interleave_0 = const()[name = string("op_7341_interleave_0"), val = bool(false)]; tensor var_7341 = concat(axis = var_7340, interleave = var_7341_interleave_0, values = (var_7338, x1_45))[name = string("op_7341")]; tensor var_7342 = mul(x = var_7341, y = sin_5)[name = string("op_7342")]; tensor query_states_91 = add(x = var_7317, y = var_7342)[name = string("query_states_91")]; tensor var_7345 = mul(x = var_7302_cast_fp16, y = cos_5)[name = string("op_7345")]; tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_47 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = var_7302_cast_fp16)[name = string("x1_47")]; tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_47 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = var_7302_cast_fp16)[name = string("x2_47")]; fp16 const_252_promoted = const()[name = string("const_252_promoted"), val = fp16(-0x1p+0)]; tensor var_7366 = mul(x = x2_47, y = const_252_promoted)[name = string("op_7366")]; int32 var_7368 = const()[name = string("op_7368"), val = int32(-1)]; bool var_7369_interleave_0 = const()[name = string("op_7369_interleave_0"), val = bool(false)]; tensor var_7369 = concat(axis = var_7368, interleave = var_7369_interleave_0, values = (var_7366, x1_47))[name = string("op_7369")]; tensor var_7370 = mul(x = var_7369, y = sin_5)[name = string("op_7370")]; tensor key_states_113 = add(x = var_7345, y = var_7370)[name = string("key_states_113")]; tensor expand_dims_132 = const()[name = string("expand_dims_132"), val = tensor([11])]; tensor expand_dims_133 = const()[name = string("expand_dims_133"), val = tensor([0])]; tensor expand_dims_135 = const()[name = string("expand_dims_135"), val = tensor([0])]; tensor expand_dims_136 = const()[name = string("expand_dims_136"), val = tensor([12])]; int32 concat_200_axis_0 = const()[name = string("concat_200_axis_0"), val = int32(0)]; bool concat_200_interleave_0 = const()[name = string("concat_200_interleave_0"), val = bool(false)]; tensor concat_200 = concat(axis = concat_200_axis_0, interleave = concat_200_interleave_0, values = (expand_dims_132, expand_dims_133, current_pos, expand_dims_135))[name = string("concat_200")]; tensor concat_201_values1_0 = const()[name = string("concat_201_values1_0"), val = tensor([0])]; tensor concat_201_values3_0 = const()[name = string("concat_201_values3_0"), val = tensor([0])]; int32 concat_201_axis_0 = const()[name = string("concat_201_axis_0"), val = int32(0)]; bool concat_201_interleave_0 = const()[name = string("concat_201_interleave_0"), val = bool(false)]; tensor concat_201 = concat(axis = concat_201_axis_0, interleave = concat_201_interleave_0, values = (expand_dims_136, concat_201_values1_0, var_1760, concat_201_values3_0))[name = string("concat_201")]; tensor model_model_kv_cache_0_internal_tensor_assign_23_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_200, begin_mask = model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0, end = concat_201, end_mask = model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_23_stride_0, update = key_states_113, x = coreml_update_state_77)[name = string("model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_78_write_state")]; tensor coreml_update_state_78 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_78")]; tensor expand_dims_138 = const()[name = string("expand_dims_138"), val = tensor([39])]; tensor expand_dims_139 = const()[name = string("expand_dims_139"), val = tensor([0])]; tensor expand_dims_141 = const()[name = string("expand_dims_141"), val = tensor([0])]; tensor expand_dims_142 = const()[name = string("expand_dims_142"), val = tensor([40])]; int32 concat_204_axis_0 = const()[name = string("concat_204_axis_0"), val = int32(0)]; bool concat_204_interleave_0 = const()[name = string("concat_204_interleave_0"), val = bool(false)]; tensor concat_204 = concat(axis = concat_204_axis_0, interleave = concat_204_interleave_0, values = (expand_dims_138, expand_dims_139, current_pos, expand_dims_141))[name = string("concat_204")]; tensor concat_205_values1_0 = const()[name = string("concat_205_values1_0"), val = tensor([0])]; tensor concat_205_values3_0 = const()[name = string("concat_205_values3_0"), val = tensor([0])]; int32 concat_205_axis_0 = const()[name = string("concat_205_axis_0"), val = int32(0)]; bool concat_205_interleave_0 = const()[name = string("concat_205_interleave_0"), val = bool(false)]; tensor concat_205 = concat(axis = concat_205_axis_0, interleave = concat_205_interleave_0, values = (expand_dims_142, concat_205_values1_0, var_1760, concat_205_values3_0))[name = string("concat_205")]; tensor model_model_kv_cache_0_internal_tensor_assign_24_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_91 = transpose(perm = var_7267, x = var_7262)[name = string("transpose_149")]; tensor model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_204, begin_mask = model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0, end = concat_205, end_mask = model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_24_stride_0, update = value_states_91, x = coreml_update_state_78)[name = string("model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_79_write_state")]; tensor coreml_update_state_79 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_79")]; tensor var_7441_begin_0 = const()[name = string("op_7441_begin_0"), val = tensor([11, 0, 0, 0])]; tensor var_7441_end_0 = const()[name = string("op_7441_end_0"), val = tensor([12, 8, 1024, 128])]; tensor var_7441_end_mask_0 = const()[name = string("op_7441_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7441_cast_fp16 = slice_by_index(begin = var_7441_begin_0, end = var_7441_end_0, end_mask = var_7441_end_mask_0, x = coreml_update_state_79)[name = string("op_7441_cast_fp16")]; tensor K_layer_cache_23_axes_0 = const()[name = string("K_layer_cache_23_axes_0"), val = tensor([0])]; tensor K_layer_cache_23_cast_fp16 = squeeze(axes = K_layer_cache_23_axes_0, x = var_7441_cast_fp16)[name = string("K_layer_cache_23_cast_fp16")]; tensor var_7448_begin_0 = const()[name = string("op_7448_begin_0"), val = tensor([39, 0, 0, 0])]; tensor var_7448_end_0 = const()[name = string("op_7448_end_0"), val = tensor([40, 8, 1024, 128])]; tensor var_7448_end_mask_0 = const()[name = string("op_7448_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7448_cast_fp16 = slice_by_index(begin = var_7448_begin_0, end = var_7448_end_0, end_mask = var_7448_end_mask_0, x = coreml_update_state_79)[name = string("op_7448_cast_fp16")]; tensor V_layer_cache_23_axes_0 = const()[name = string("V_layer_cache_23_axes_0"), val = tensor([0])]; tensor V_layer_cache_23_cast_fp16 = squeeze(axes = V_layer_cache_23_axes_0, x = var_7448_cast_fp16)[name = string("V_layer_cache_23_cast_fp16")]; tensor x_227_axes_0 = const()[name = string("x_227_axes_0"), val = tensor([1])]; tensor x_227_cast_fp16 = expand_dims(axes = x_227_axes_0, x = K_layer_cache_23_cast_fp16)[name = string("x_227_cast_fp16")]; tensor var_7477 = const()[name = string("op_7477"), val = tensor([1, 2, 1, 1])]; tensor x_229_cast_fp16 = tile(reps = var_7477, x = x_227_cast_fp16)[name = string("x_229_cast_fp16")]; tensor var_7489 = const()[name = string("op_7489"), val = tensor([1, -1, 1024, 128])]; tensor key_states_117_cast_fp16 = reshape(shape = var_7489, x = x_229_cast_fp16)[name = string("key_states_117_cast_fp16")]; tensor x_233_axes_0 = const()[name = string("x_233_axes_0"), val = tensor([1])]; tensor x_233_cast_fp16 = expand_dims(axes = x_233_axes_0, x = V_layer_cache_23_cast_fp16)[name = string("x_233_cast_fp16")]; tensor var_7497 = const()[name = string("op_7497"), val = tensor([1, 2, 1, 1])]; tensor x_235_cast_fp16 = tile(reps = var_7497, x = x_233_cast_fp16)[name = string("x_235_cast_fp16")]; bool var_7524_transpose_x_0 = const()[name = string("op_7524_transpose_x_0"), val = bool(false)]; bool var_7524_transpose_y_0 = const()[name = string("op_7524_transpose_y_0"), val = bool(true)]; tensor var_7524 = matmul(transpose_x = var_7524_transpose_x_0, transpose_y = var_7524_transpose_y_0, x = query_states_91, y = key_states_117_cast_fp16)[name = string("op_7524")]; fp16 var_7525_to_fp16 = const()[name = string("op_7525_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_45_cast_fp16 = mul(x = var_7524, y = var_7525_to_fp16)[name = string("attn_weights_45_cast_fp16")]; tensor attn_weights_47_cast_fp16 = add(x = attn_weights_45_cast_fp16, y = causal_mask)[name = string("attn_weights_47_cast_fp16")]; int32 var_7560 = const()[name = string("op_7560"), val = int32(-1)]; tensor var_7562_cast_fp16 = softmax(axis = var_7560, x = attn_weights_47_cast_fp16)[name = string("op_7562_cast_fp16")]; tensor concat_210 = const()[name = string("concat_210"), val = tensor([16, 64, 1024])]; tensor reshape_33_cast_fp16 = reshape(shape = concat_210, x = var_7562_cast_fp16)[name = string("reshape_33_cast_fp16")]; tensor concat_211 = const()[name = string("concat_211"), val = tensor([16, 1024, 128])]; tensor reshape_34_cast_fp16 = reshape(shape = concat_211, x = x_235_cast_fp16)[name = string("reshape_34_cast_fp16")]; bool matmul_11_transpose_x_0 = const()[name = string("matmul_11_transpose_x_0"), val = bool(false)]; bool matmul_11_transpose_y_0 = const()[name = string("matmul_11_transpose_y_0"), val = bool(false)]; tensor matmul_11_cast_fp16 = matmul(transpose_x = matmul_11_transpose_x_0, transpose_y = matmul_11_transpose_y_0, x = reshape_33_cast_fp16, y = reshape_34_cast_fp16)[name = string("matmul_11_cast_fp16")]; tensor concat_215 = const()[name = string("concat_215"), val = tensor([1, 16, 64, 128])]; tensor reshape_35_cast_fp16 = reshape(shape = concat_215, x = matmul_11_cast_fp16)[name = string("reshape_35_cast_fp16")]; tensor var_7574_perm_0 = const()[name = string("op_7574_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_7593 = const()[name = string("op_7593"), val = tensor([1, 64, 2048])]; tensor var_7574_cast_fp16 = transpose(perm = var_7574_perm_0, x = reshape_35_cast_fp16)[name = string("transpose_148")]; tensor attn_output_115_cast_fp16 = reshape(shape = var_7593, x = var_7574_cast_fp16)[name = string("attn_output_115_cast_fp16")]; tensor var_7598 = const()[name = string("op_7598"), val = tensor([0, 2, 1])]; string var_7614_pad_type_0 = const()[name = string("op_7614_pad_type_0"), val = string("valid")]; int32 var_7614_groups_0 = const()[name = string("op_7614_groups_0"), val = int32(1)]; tensor var_7614_strides_0 = const()[name = string("op_7614_strides_0"), val = tensor([1])]; tensor var_7614_pad_0 = const()[name = string("op_7614_pad_0"), val = tensor([0, 0])]; tensor var_7614_dilations_0 = const()[name = string("op_7614_dilations_0"), val = tensor([1])]; tensor squeeze_11_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1045786112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1048931904))))[name = string("squeeze_11_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_7599_cast_fp16 = transpose(perm = var_7598, x = attn_output_115_cast_fp16)[name = string("transpose_147")]; tensor var_7614_cast_fp16 = conv(dilations = var_7614_dilations_0, groups = var_7614_groups_0, pad = var_7614_pad_0, pad_type = var_7614_pad_type_0, strides = var_7614_strides_0, weight = squeeze_11_cast_fp16_to_fp32_to_fp16_palettized, x = var_7599_cast_fp16)[name = string("op_7614_cast_fp16")]; tensor var_7618 = const()[name = string("op_7618"), val = tensor([0, 2, 1])]; tensor attn_output_119_cast_fp16 = transpose(perm = var_7618, x = var_7614_cast_fp16)[name = string("transpose_146")]; tensor hidden_states_71_cast_fp16 = add(x = hidden_states_67_cast_fp16, y = attn_output_119_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; tensor mean_95_axes_0 = const()[name = string("mean_95_axes_0"), val = tensor([-1])]; bool mean_95_keep_dims_0 = const()[name = string("mean_95_keep_dims_0"), val = bool(true)]; tensor mean_95_cast_fp16 = reduce_mean(axes = mean_95_axes_0, keep_dims = mean_95_keep_dims_0, x = hidden_states_71_cast_fp16)[name = string("mean_95_cast_fp16")]; tensor input_209_cast_fp16 = sub(x = hidden_states_71_cast_fp16, y = mean_95_cast_fp16)[name = string("input_209_cast_fp16")]; tensor var_7637_axes_0 = const()[name = string("op_7637_axes_0"), val = tensor([-1])]; tensor model_model_layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1048964736)))]; fp16 var_7625_to_fp16 = const()[name = string("op_7625_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7637_cast_fp16 = layer_norm(axes = var_7637_axes_0, epsilon = var_7625_to_fp16, gamma = model_model_layers_11_post_attention_layernorm_weight_to_fp16, x = input_209_cast_fp16)[name = string("op_7637_cast_fp16")]; tensor var_7651 = const()[name = string("op_7651"), val = tensor([0, 2, 1])]; tensor input_211_axes_0 = const()[name = string("input_211_axes_0"), val = tensor([2])]; tensor var_7652 = transpose(perm = var_7651, x = var_7637_cast_fp16)[name = string("transpose_145")]; tensor input_211 = expand_dims(axes = input_211_axes_0, x = var_7652)[name = string("input_211")]; string input_213_pad_type_0 = const()[name = string("input_213_pad_type_0"), val = string("valid")]; tensor input_213_strides_0 = const()[name = string("input_213_strides_0"), val = tensor([1, 1])]; tensor input_213_pad_0 = const()[name = string("input_213_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_213_dilations_0 = const()[name = string("input_213_dilations_0"), val = tensor([1, 1])]; int32 input_213_groups_0 = const()[name = string("input_213_groups_0"), val = int32(1)]; tensor input_213 = conv(dilations = input_213_dilations_0, groups = input_213_groups_0, pad = input_213_pad_0, pad_type = input_213_pad_type_0, strides = input_213_strides_0, weight = model_model_layers_11_mlp_gate_proj_weight_palettized, x = input_211)[name = string("input_213")]; string b_23_pad_type_0 = const()[name = string("b_23_pad_type_0"), val = string("valid")]; tensor b_23_strides_0 = const()[name = string("b_23_strides_0"), val = tensor([1, 1])]; tensor b_23_pad_0 = const()[name = string("b_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_23_dilations_0 = const()[name = string("b_23_dilations_0"), val = tensor([1, 1])]; int32 b_23_groups_0 = const()[name = string("b_23_groups_0"), val = int32(1)]; tensor b_23 = conv(dilations = b_23_dilations_0, groups = b_23_groups_0, pad = b_23_pad_0, pad_type = b_23_pad_type_0, strides = b_23_strides_0, weight = model_model_layers_11_mlp_up_proj_weight_palettized, x = input_211)[name = string("b_23")]; tensor c_23 = silu(x = input_213)[name = string("c_23")]; tensor input_215 = mul(x = c_23, y = b_23)[name = string("input_215")]; string e_23_pad_type_0 = const()[name = string("e_23_pad_type_0"), val = string("valid")]; tensor e_23_strides_0 = const()[name = string("e_23_strides_0"), val = tensor([1, 1])]; tensor e_23_pad_0 = const()[name = string("e_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_23_dilations_0 = const()[name = string("e_23_dilations_0"), val = tensor([1, 1])]; int32 e_23_groups_0 = const()[name = string("e_23_groups_0"), val = int32(1)]; tensor e_23 = conv(dilations = e_23_dilations_0, groups = e_23_groups_0, pad = e_23_pad_0, pad_type = e_23_pad_type_0, strides = e_23_strides_0, weight = model_model_layers_11_mlp_down_proj_weight_palettized, x = input_215)[name = string("e_23")]; tensor var_7674_axes_0 = const()[name = string("op_7674_axes_0"), val = tensor([2])]; tensor var_7674 = squeeze(axes = var_7674_axes_0, x = e_23)[name = string("op_7674")]; tensor var_7675 = const()[name = string("op_7675"), val = tensor([0, 2, 1])]; tensor var_7676 = transpose(perm = var_7675, x = var_7674)[name = string("transpose_144")]; tensor hidden_states_73_cast_fp16 = add(x = hidden_states_71_cast_fp16, y = var_7676)[name = string("hidden_states_73_cast_fp16")]; tensor mean_97_axes_0 = const()[name = string("mean_97_axes_0"), val = tensor([-1])]; bool mean_97_keep_dims_0 = const()[name = string("mean_97_keep_dims_0"), val = bool(true)]; tensor mean_97_cast_fp16 = reduce_mean(axes = mean_97_axes_0, keep_dims = mean_97_keep_dims_0, x = hidden_states_73_cast_fp16)[name = string("mean_97_cast_fp16")]; tensor input_217_cast_fp16 = sub(x = hidden_states_73_cast_fp16, y = mean_97_cast_fp16)[name = string("input_217_cast_fp16")]; tensor var_7694_axes_0 = const()[name = string("op_7694_axes_0"), val = tensor([-1])]; tensor model_model_layers_12_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1048968896)))]; fp16 var_7682_to_fp16 = const()[name = string("op_7682_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7694_cast_fp16 = layer_norm(axes = var_7694_axes_0, epsilon = var_7682_to_fp16, gamma = model_model_layers_12_input_layernorm_weight_to_fp16, x = input_217_cast_fp16)[name = string("op_7694_cast_fp16")]; tensor var_7706 = const()[name = string("op_7706"), val = tensor([0, 2, 1])]; tensor var_7709_axes_0 = const()[name = string("op_7709_axes_0"), val = tensor([2])]; tensor var_7707 = transpose(perm = var_7706, x = var_7694_cast_fp16)[name = string("transpose_143")]; tensor var_7709 = expand_dims(axes = var_7709_axes_0, x = var_7707)[name = string("op_7709")]; string query_states_97_pad_type_0 = const()[name = string("query_states_97_pad_type_0"), val = string("valid")]; tensor query_states_97_strides_0 = const()[name = string("query_states_97_strides_0"), val = tensor([1, 1])]; tensor query_states_97_pad_0 = const()[name = string("query_states_97_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_97_dilations_0 = const()[name = string("query_states_97_dilations_0"), val = tensor([1, 1])]; int32 query_states_97_groups_0 = const()[name = string("query_states_97_groups_0"), val = int32(1)]; tensor query_states_97 = conv(dilations = query_states_97_dilations_0, groups = query_states_97_groups_0, pad = query_states_97_pad_0, pad_type = query_states_97_pad_type_0, strides = query_states_97_strides_0, weight = model_model_layers_12_self_attn_q_proj_weight_palettized, x = var_7709)[name = string("query_states_97")]; string key_states_121_pad_type_0 = const()[name = string("key_states_121_pad_type_0"), val = string("valid")]; tensor key_states_121_strides_0 = const()[name = string("key_states_121_strides_0"), val = tensor([1, 1])]; tensor key_states_121_pad_0 = const()[name = string("key_states_121_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_121_dilations_0 = const()[name = string("key_states_121_dilations_0"), val = tensor([1, 1])]; int32 key_states_121_groups_0 = const()[name = string("key_states_121_groups_0"), val = int32(1)]; tensor key_states_121 = conv(dilations = key_states_121_dilations_0, groups = key_states_121_groups_0, pad = key_states_121_pad_0, pad_type = key_states_121_pad_type_0, strides = key_states_121_strides_0, weight = model_model_layers_12_self_attn_k_proj_weight_palettized, x = var_7709)[name = string("key_states_121")]; string value_states_97_pad_type_0 = const()[name = string("value_states_97_pad_type_0"), val = string("valid")]; tensor value_states_97_strides_0 = const()[name = string("value_states_97_strides_0"), val = tensor([1, 1])]; tensor value_states_97_pad_0 = const()[name = string("value_states_97_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_97_dilations_0 = const()[name = string("value_states_97_dilations_0"), val = tensor([1, 1])]; int32 value_states_97_groups_0 = const()[name = string("value_states_97_groups_0"), val = int32(1)]; tensor value_states_97 = conv(dilations = value_states_97_dilations_0, groups = value_states_97_groups_0, pad = value_states_97_pad_0, pad_type = value_states_97_pad_type_0, strides = value_states_97_strides_0, weight = model_model_layers_12_self_attn_v_proj_weight_palettized, x = var_7709)[name = string("value_states_97")]; tensor var_7751 = const()[name = string("op_7751"), val = tensor([1, 16, 128, 64])]; tensor var_7752 = reshape(shape = var_7751, x = query_states_97)[name = string("op_7752")]; tensor var_7757 = const()[name = string("op_7757"), val = tensor([0, 1, 3, 2])]; tensor var_7762 = const()[name = string("op_7762"), val = tensor([1, 8, 128, 64])]; tensor var_7763 = reshape(shape = var_7762, x = key_states_121)[name = string("op_7763")]; tensor var_7768 = const()[name = string("op_7768"), val = tensor([0, 1, 3, 2])]; tensor var_7773 = const()[name = string("op_7773"), val = tensor([1, 8, 128, 64])]; tensor var_7774 = reshape(shape = var_7773, x = value_states_97)[name = string("op_7774")]; tensor var_7779 = const()[name = string("op_7779"), val = tensor([0, 1, 3, 2])]; tensor mean_99_axes_0 = const()[name = string("mean_99_axes_0"), val = tensor([-1])]; bool mean_99_keep_dims_0 = const()[name = string("mean_99_keep_dims_0"), val = bool(true)]; tensor x_241 = transpose(perm = var_7757, x = var_7752)[name = string("transpose_142")]; tensor mean_99 = reduce_mean(axes = mean_99_axes_0, keep_dims = mean_99_keep_dims_0, x = x_241)[name = string("mean_99")]; tensor input_221 = sub(x = x_241, y = mean_99)[name = string("input_221")]; tensor var_7796_axes_0 = const()[name = string("op_7796_axes_0"), val = tensor([-1])]; tensor model_model_layers_12_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_12_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1048973056)))]; fp16 var_7784_to_fp16 = const()[name = string("op_7784_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7796_cast_fp16 = layer_norm(axes = var_7796_axes_0, epsilon = var_7784_to_fp16, gamma = model_model_layers_12_self_attn_q_norm_weight_to_fp16, x = input_221)[name = string("op_7796_cast_fp16")]; tensor mean_101_axes_0 = const()[name = string("mean_101_axes_0"), val = tensor([-1])]; bool mean_101_keep_dims_0 = const()[name = string("mean_101_keep_dims_0"), val = bool(true)]; tensor x_243 = transpose(perm = var_7768, x = var_7763)[name = string("transpose_141")]; tensor mean_101 = reduce_mean(axes = mean_101_axes_0, keep_dims = mean_101_keep_dims_0, x = x_243)[name = string("mean_101")]; tensor input_223 = sub(x = x_243, y = mean_101)[name = string("input_223")]; tensor var_7814_axes_0 = const()[name = string("op_7814_axes_0"), val = tensor([-1])]; tensor model_model_layers_12_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_12_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1048973376)))]; fp16 var_7802_to_fp16 = const()[name = string("op_7802_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7814_cast_fp16 = layer_norm(axes = var_7814_axes_0, epsilon = var_7802_to_fp16, gamma = model_model_layers_12_self_attn_k_norm_weight_to_fp16, x = input_223)[name = string("op_7814_cast_fp16")]; tensor var_7829 = mul(x = var_7796_cast_fp16, y = cos_5)[name = string("op_7829")]; tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_49 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = var_7796_cast_fp16)[name = string("x1_49")]; tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_49 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = var_7796_cast_fp16)[name = string("x2_49")]; fp16 const_271_promoted = const()[name = string("const_271_promoted"), val = fp16(-0x1p+0)]; tensor var_7850 = mul(x = x2_49, y = const_271_promoted)[name = string("op_7850")]; int32 var_7852 = const()[name = string("op_7852"), val = int32(-1)]; bool var_7853_interleave_0 = const()[name = string("op_7853_interleave_0"), val = bool(false)]; tensor var_7853 = concat(axis = var_7852, interleave = var_7853_interleave_0, values = (var_7850, x1_49))[name = string("op_7853")]; tensor var_7854 = mul(x = var_7853, y = sin_5)[name = string("op_7854")]; tensor query_states_99 = add(x = var_7829, y = var_7854)[name = string("query_states_99")]; tensor var_7857 = mul(x = var_7814_cast_fp16, y = cos_5)[name = string("op_7857")]; tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_51 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = var_7814_cast_fp16)[name = string("x1_51")]; tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_51 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = var_7814_cast_fp16)[name = string("x2_51")]; fp16 const_274_promoted = const()[name = string("const_274_promoted"), val = fp16(-0x1p+0)]; tensor var_7878 = mul(x = x2_51, y = const_274_promoted)[name = string("op_7878")]; int32 var_7880 = const()[name = string("op_7880"), val = int32(-1)]; bool var_7881_interleave_0 = const()[name = string("op_7881_interleave_0"), val = bool(false)]; tensor var_7881 = concat(axis = var_7880, interleave = var_7881_interleave_0, values = (var_7878, x1_51))[name = string("op_7881")]; tensor var_7882 = mul(x = var_7881, y = sin_5)[name = string("op_7882")]; tensor key_states_123 = add(x = var_7857, y = var_7882)[name = string("key_states_123")]; tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([12])]; tensor expand_dims_145 = const()[name = string("expand_dims_145"), val = tensor([0])]; tensor expand_dims_147 = const()[name = string("expand_dims_147"), val = tensor([0])]; tensor expand_dims_148 = const()[name = string("expand_dims_148"), val = tensor([13])]; int32 concat_218_axis_0 = const()[name = string("concat_218_axis_0"), val = int32(0)]; bool concat_218_interleave_0 = const()[name = string("concat_218_interleave_0"), val = bool(false)]; tensor concat_218 = concat(axis = concat_218_axis_0, interleave = concat_218_interleave_0, values = (expand_dims_144, expand_dims_145, current_pos, expand_dims_147))[name = string("concat_218")]; tensor concat_219_values1_0 = const()[name = string("concat_219_values1_0"), val = tensor([0])]; tensor concat_219_values3_0 = const()[name = string("concat_219_values3_0"), val = tensor([0])]; int32 concat_219_axis_0 = const()[name = string("concat_219_axis_0"), val = int32(0)]; bool concat_219_interleave_0 = const()[name = string("concat_219_interleave_0"), val = bool(false)]; tensor concat_219 = concat(axis = concat_219_axis_0, interleave = concat_219_interleave_0, values = (expand_dims_148, concat_219_values1_0, var_1760, concat_219_values3_0))[name = string("concat_219")]; tensor model_model_kv_cache_0_internal_tensor_assign_25_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_218, begin_mask = model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0, end = concat_219, end_mask = model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_25_stride_0, update = key_states_123, x = coreml_update_state_79)[name = string("model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_80_write_state")]; tensor coreml_update_state_80 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_80")]; tensor expand_dims_150 = const()[name = string("expand_dims_150"), val = tensor([40])]; tensor expand_dims_151 = const()[name = string("expand_dims_151"), val = tensor([0])]; tensor expand_dims_153 = const()[name = string("expand_dims_153"), val = tensor([0])]; tensor expand_dims_154 = const()[name = string("expand_dims_154"), val = tensor([41])]; int32 concat_222_axis_0 = const()[name = string("concat_222_axis_0"), val = int32(0)]; bool concat_222_interleave_0 = const()[name = string("concat_222_interleave_0"), val = bool(false)]; tensor concat_222 = concat(axis = concat_222_axis_0, interleave = concat_222_interleave_0, values = (expand_dims_150, expand_dims_151, current_pos, expand_dims_153))[name = string("concat_222")]; tensor concat_223_values1_0 = const()[name = string("concat_223_values1_0"), val = tensor([0])]; tensor concat_223_values3_0 = const()[name = string("concat_223_values3_0"), val = tensor([0])]; int32 concat_223_axis_0 = const()[name = string("concat_223_axis_0"), val = int32(0)]; bool concat_223_interleave_0 = const()[name = string("concat_223_interleave_0"), val = bool(false)]; tensor concat_223 = concat(axis = concat_223_axis_0, interleave = concat_223_interleave_0, values = (expand_dims_154, concat_223_values1_0, var_1760, concat_223_values3_0))[name = string("concat_223")]; tensor model_model_kv_cache_0_internal_tensor_assign_26_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_99 = transpose(perm = var_7779, x = var_7774)[name = string("transpose_140")]; tensor model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_222, begin_mask = model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0, end = concat_223, end_mask = model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_26_stride_0, update = value_states_99, x = coreml_update_state_80)[name = string("model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_81_write_state")]; tensor coreml_update_state_81 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_81")]; tensor var_7953_begin_0 = const()[name = string("op_7953_begin_0"), val = tensor([12, 0, 0, 0])]; tensor var_7953_end_0 = const()[name = string("op_7953_end_0"), val = tensor([13, 8, 1024, 128])]; tensor var_7953_end_mask_0 = const()[name = string("op_7953_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7953_cast_fp16 = slice_by_index(begin = var_7953_begin_0, end = var_7953_end_0, end_mask = var_7953_end_mask_0, x = coreml_update_state_81)[name = string("op_7953_cast_fp16")]; tensor K_layer_cache_25_axes_0 = const()[name = string("K_layer_cache_25_axes_0"), val = tensor([0])]; tensor K_layer_cache_25_cast_fp16 = squeeze(axes = K_layer_cache_25_axes_0, x = var_7953_cast_fp16)[name = string("K_layer_cache_25_cast_fp16")]; tensor var_7960_begin_0 = const()[name = string("op_7960_begin_0"), val = tensor([40, 0, 0, 0])]; tensor var_7960_end_0 = const()[name = string("op_7960_end_0"), val = tensor([41, 8, 1024, 128])]; tensor var_7960_end_mask_0 = const()[name = string("op_7960_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7960_cast_fp16 = slice_by_index(begin = var_7960_begin_0, end = var_7960_end_0, end_mask = var_7960_end_mask_0, x = coreml_update_state_81)[name = string("op_7960_cast_fp16")]; tensor V_layer_cache_25_axes_0 = const()[name = string("V_layer_cache_25_axes_0"), val = tensor([0])]; tensor V_layer_cache_25_cast_fp16 = squeeze(axes = V_layer_cache_25_axes_0, x = var_7960_cast_fp16)[name = string("V_layer_cache_25_cast_fp16")]; tensor x_247_axes_0 = const()[name = string("x_247_axes_0"), val = tensor([1])]; tensor x_247_cast_fp16 = expand_dims(axes = x_247_axes_0, x = K_layer_cache_25_cast_fp16)[name = string("x_247_cast_fp16")]; tensor var_7989 = const()[name = string("op_7989"), val = tensor([1, 2, 1, 1])]; tensor x_249_cast_fp16 = tile(reps = var_7989, x = x_247_cast_fp16)[name = string("x_249_cast_fp16")]; tensor var_8001 = const()[name = string("op_8001"), val = tensor([1, -1, 1024, 128])]; tensor key_states_127_cast_fp16 = reshape(shape = var_8001, x = x_249_cast_fp16)[name = string("key_states_127_cast_fp16")]; tensor x_253_axes_0 = const()[name = string("x_253_axes_0"), val = tensor([1])]; tensor x_253_cast_fp16 = expand_dims(axes = x_253_axes_0, x = V_layer_cache_25_cast_fp16)[name = string("x_253_cast_fp16")]; tensor var_8009 = const()[name = string("op_8009"), val = tensor([1, 2, 1, 1])]; tensor x_255_cast_fp16 = tile(reps = var_8009, x = x_253_cast_fp16)[name = string("x_255_cast_fp16")]; bool var_8036_transpose_x_0 = const()[name = string("op_8036_transpose_x_0"), val = bool(false)]; bool var_8036_transpose_y_0 = const()[name = string("op_8036_transpose_y_0"), val = bool(true)]; tensor var_8036 = matmul(transpose_x = var_8036_transpose_x_0, transpose_y = var_8036_transpose_y_0, x = query_states_99, y = key_states_127_cast_fp16)[name = string("op_8036")]; fp16 var_8037_to_fp16 = const()[name = string("op_8037_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_49_cast_fp16 = mul(x = var_8036, y = var_8037_to_fp16)[name = string("attn_weights_49_cast_fp16")]; tensor attn_weights_51_cast_fp16 = add(x = attn_weights_49_cast_fp16, y = causal_mask)[name = string("attn_weights_51_cast_fp16")]; int32 var_8072 = const()[name = string("op_8072"), val = int32(-1)]; tensor var_8074_cast_fp16 = softmax(axis = var_8072, x = attn_weights_51_cast_fp16)[name = string("op_8074_cast_fp16")]; tensor concat_228 = const()[name = string("concat_228"), val = tensor([16, 64, 1024])]; tensor reshape_36_cast_fp16 = reshape(shape = concat_228, x = var_8074_cast_fp16)[name = string("reshape_36_cast_fp16")]; tensor concat_229 = const()[name = string("concat_229"), val = tensor([16, 1024, 128])]; tensor reshape_37_cast_fp16 = reshape(shape = concat_229, x = x_255_cast_fp16)[name = string("reshape_37_cast_fp16")]; bool matmul_12_transpose_x_0 = const()[name = string("matmul_12_transpose_x_0"), val = bool(false)]; bool matmul_12_transpose_y_0 = const()[name = string("matmul_12_transpose_y_0"), val = bool(false)]; tensor matmul_12_cast_fp16 = matmul(transpose_x = matmul_12_transpose_x_0, transpose_y = matmul_12_transpose_y_0, x = reshape_36_cast_fp16, y = reshape_37_cast_fp16)[name = string("matmul_12_cast_fp16")]; tensor concat_233 = const()[name = string("concat_233"), val = tensor([1, 16, 64, 128])]; tensor reshape_38_cast_fp16 = reshape(shape = concat_233, x = matmul_12_cast_fp16)[name = string("reshape_38_cast_fp16")]; tensor var_8086_perm_0 = const()[name = string("op_8086_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_8105 = const()[name = string("op_8105"), val = tensor([1, 64, 2048])]; tensor var_8086_cast_fp16 = transpose(perm = var_8086_perm_0, x = reshape_38_cast_fp16)[name = string("transpose_139")]; tensor attn_output_125_cast_fp16 = reshape(shape = var_8105, x = var_8086_cast_fp16)[name = string("attn_output_125_cast_fp16")]; tensor var_8110 = const()[name = string("op_8110"), val = tensor([0, 2, 1])]; string var_8126_pad_type_0 = const()[name = string("op_8126_pad_type_0"), val = string("valid")]; int32 var_8126_groups_0 = const()[name = string("op_8126_groups_0"), val = int32(1)]; tensor var_8126_strides_0 = const()[name = string("op_8126_strides_0"), val = tensor([1])]; tensor var_8126_pad_0 = const()[name = string("op_8126_pad_0"), val = tensor([0, 0])]; tensor var_8126_dilations_0 = const()[name = string("op_8126_dilations_0"), val = tensor([1])]; tensor squeeze_12_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1048973696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1052119488))))[name = string("squeeze_12_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_8111_cast_fp16 = transpose(perm = var_8110, x = attn_output_125_cast_fp16)[name = string("transpose_138")]; tensor var_8126_cast_fp16 = conv(dilations = var_8126_dilations_0, groups = var_8126_groups_0, pad = var_8126_pad_0, pad_type = var_8126_pad_type_0, strides = var_8126_strides_0, weight = squeeze_12_cast_fp16_to_fp32_to_fp16_palettized, x = var_8111_cast_fp16)[name = string("op_8126_cast_fp16")]; tensor var_8130 = const()[name = string("op_8130"), val = tensor([0, 2, 1])]; tensor attn_output_129_cast_fp16 = transpose(perm = var_8130, x = var_8126_cast_fp16)[name = string("transpose_137")]; tensor hidden_states_77_cast_fp16 = add(x = hidden_states_73_cast_fp16, y = attn_output_129_cast_fp16)[name = string("hidden_states_77_cast_fp16")]; tensor mean_103_axes_0 = const()[name = string("mean_103_axes_0"), val = tensor([-1])]; bool mean_103_keep_dims_0 = const()[name = string("mean_103_keep_dims_0"), val = bool(true)]; tensor mean_103_cast_fp16 = reduce_mean(axes = mean_103_axes_0, keep_dims = mean_103_keep_dims_0, x = hidden_states_77_cast_fp16)[name = string("mean_103_cast_fp16")]; tensor input_227_cast_fp16 = sub(x = hidden_states_77_cast_fp16, y = mean_103_cast_fp16)[name = string("input_227_cast_fp16")]; tensor var_8149_axes_0 = const()[name = string("op_8149_axes_0"), val = tensor([-1])]; tensor model_model_layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1052152320)))]; fp16 var_8137_to_fp16 = const()[name = string("op_8137_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8149_cast_fp16 = layer_norm(axes = var_8149_axes_0, epsilon = var_8137_to_fp16, gamma = model_model_layers_12_post_attention_layernorm_weight_to_fp16, x = input_227_cast_fp16)[name = string("op_8149_cast_fp16")]; tensor var_8163 = const()[name = string("op_8163"), val = tensor([0, 2, 1])]; tensor input_229_axes_0 = const()[name = string("input_229_axes_0"), val = tensor([2])]; tensor var_8164 = transpose(perm = var_8163, x = var_8149_cast_fp16)[name = string("transpose_136")]; tensor input_229 = expand_dims(axes = input_229_axes_0, x = var_8164)[name = string("input_229")]; string input_231_pad_type_0 = const()[name = string("input_231_pad_type_0"), val = string("valid")]; tensor input_231_strides_0 = const()[name = string("input_231_strides_0"), val = tensor([1, 1])]; tensor input_231_pad_0 = const()[name = string("input_231_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_231_dilations_0 = const()[name = string("input_231_dilations_0"), val = tensor([1, 1])]; int32 input_231_groups_0 = const()[name = string("input_231_groups_0"), val = int32(1)]; tensor input_231 = conv(dilations = input_231_dilations_0, groups = input_231_groups_0, pad = input_231_pad_0, pad_type = input_231_pad_type_0, strides = input_231_strides_0, weight = model_model_layers_12_mlp_gate_proj_weight_palettized, x = input_229)[name = string("input_231")]; string b_25_pad_type_0 = const()[name = string("b_25_pad_type_0"), val = string("valid")]; tensor b_25_strides_0 = const()[name = string("b_25_strides_0"), val = tensor([1, 1])]; tensor b_25_pad_0 = const()[name = string("b_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_25_dilations_0 = const()[name = string("b_25_dilations_0"), val = tensor([1, 1])]; int32 b_25_groups_0 = const()[name = string("b_25_groups_0"), val = int32(1)]; tensor b_25 = conv(dilations = b_25_dilations_0, groups = b_25_groups_0, pad = b_25_pad_0, pad_type = b_25_pad_type_0, strides = b_25_strides_0, weight = model_model_layers_12_mlp_up_proj_weight_palettized, x = input_229)[name = string("b_25")]; tensor c_25 = silu(x = input_231)[name = string("c_25")]; tensor input_233 = mul(x = c_25, y = b_25)[name = string("input_233")]; string e_25_pad_type_0 = const()[name = string("e_25_pad_type_0"), val = string("valid")]; tensor e_25_strides_0 = const()[name = string("e_25_strides_0"), val = tensor([1, 1])]; tensor e_25_pad_0 = const()[name = string("e_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_25_dilations_0 = const()[name = string("e_25_dilations_0"), val = tensor([1, 1])]; int32 e_25_groups_0 = const()[name = string("e_25_groups_0"), val = int32(1)]; tensor e_25 = conv(dilations = e_25_dilations_0, groups = e_25_groups_0, pad = e_25_pad_0, pad_type = e_25_pad_type_0, strides = e_25_strides_0, weight = model_model_layers_12_mlp_down_proj_weight_palettized, x = input_233)[name = string("e_25")]; tensor var_8186_axes_0 = const()[name = string("op_8186_axes_0"), val = tensor([2])]; tensor var_8186 = squeeze(axes = var_8186_axes_0, x = e_25)[name = string("op_8186")]; tensor var_8187 = const()[name = string("op_8187"), val = tensor([0, 2, 1])]; tensor var_8188 = transpose(perm = var_8187, x = var_8186)[name = string("transpose_135")]; tensor hidden_states_79_cast_fp16 = add(x = hidden_states_77_cast_fp16, y = var_8188)[name = string("hidden_states_79_cast_fp16")]; tensor mean_105_axes_0 = const()[name = string("mean_105_axes_0"), val = tensor([-1])]; bool mean_105_keep_dims_0 = const()[name = string("mean_105_keep_dims_0"), val = bool(true)]; tensor mean_105_cast_fp16 = reduce_mean(axes = mean_105_axes_0, keep_dims = mean_105_keep_dims_0, x = hidden_states_79_cast_fp16)[name = string("mean_105_cast_fp16")]; tensor input_235_cast_fp16 = sub(x = hidden_states_79_cast_fp16, y = mean_105_cast_fp16)[name = string("input_235_cast_fp16")]; tensor var_8206_axes_0 = const()[name = string("op_8206_axes_0"), val = tensor([-1])]; tensor model_model_layers_13_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1052156480)))]; fp16 var_8194_to_fp16 = const()[name = string("op_8194_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8206_cast_fp16 = layer_norm(axes = var_8206_axes_0, epsilon = var_8194_to_fp16, gamma = model_model_layers_13_input_layernorm_weight_to_fp16, x = input_235_cast_fp16)[name = string("op_8206_cast_fp16")]; tensor var_8218 = const()[name = string("op_8218"), val = tensor([0, 2, 1])]; tensor var_8221_axes_0 = const()[name = string("op_8221_axes_0"), val = tensor([2])]; tensor var_8219 = transpose(perm = var_8218, x = var_8206_cast_fp16)[name = string("transpose_134")]; tensor var_8221 = expand_dims(axes = var_8221_axes_0, x = var_8219)[name = string("op_8221")]; string query_states_105_pad_type_0 = const()[name = string("query_states_105_pad_type_0"), val = string("valid")]; tensor query_states_105_strides_0 = const()[name = string("query_states_105_strides_0"), val = tensor([1, 1])]; tensor query_states_105_pad_0 = const()[name = string("query_states_105_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_105_dilations_0 = const()[name = string("query_states_105_dilations_0"), val = tensor([1, 1])]; int32 query_states_105_groups_0 = const()[name = string("query_states_105_groups_0"), val = int32(1)]; tensor query_states_105 = conv(dilations = query_states_105_dilations_0, groups = query_states_105_groups_0, pad = query_states_105_pad_0, pad_type = query_states_105_pad_type_0, strides = query_states_105_strides_0, weight = model_model_layers_13_self_attn_q_proj_weight_palettized, x = var_8221)[name = string("query_states_105")]; string key_states_131_pad_type_0 = const()[name = string("key_states_131_pad_type_0"), val = string("valid")]; tensor key_states_131_strides_0 = const()[name = string("key_states_131_strides_0"), val = tensor([1, 1])]; tensor key_states_131_pad_0 = const()[name = string("key_states_131_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_131_dilations_0 = const()[name = string("key_states_131_dilations_0"), val = tensor([1, 1])]; int32 key_states_131_groups_0 = const()[name = string("key_states_131_groups_0"), val = int32(1)]; tensor key_states_131 = conv(dilations = key_states_131_dilations_0, groups = key_states_131_groups_0, pad = key_states_131_pad_0, pad_type = key_states_131_pad_type_0, strides = key_states_131_strides_0, weight = model_model_layers_13_self_attn_k_proj_weight_palettized, x = var_8221)[name = string("key_states_131")]; string value_states_105_pad_type_0 = const()[name = string("value_states_105_pad_type_0"), val = string("valid")]; tensor value_states_105_strides_0 = const()[name = string("value_states_105_strides_0"), val = tensor([1, 1])]; tensor value_states_105_pad_0 = const()[name = string("value_states_105_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_105_dilations_0 = const()[name = string("value_states_105_dilations_0"), val = tensor([1, 1])]; int32 value_states_105_groups_0 = const()[name = string("value_states_105_groups_0"), val = int32(1)]; tensor value_states_105 = conv(dilations = value_states_105_dilations_0, groups = value_states_105_groups_0, pad = value_states_105_pad_0, pad_type = value_states_105_pad_type_0, strides = value_states_105_strides_0, weight = model_model_layers_13_self_attn_v_proj_weight_palettized, x = var_8221)[name = string("value_states_105")]; tensor var_8263 = const()[name = string("op_8263"), val = tensor([1, 16, 128, 64])]; tensor var_8264 = reshape(shape = var_8263, x = query_states_105)[name = string("op_8264")]; tensor var_8269 = const()[name = string("op_8269"), val = tensor([0, 1, 3, 2])]; tensor var_8274 = const()[name = string("op_8274"), val = tensor([1, 8, 128, 64])]; tensor var_8275 = reshape(shape = var_8274, x = key_states_131)[name = string("op_8275")]; tensor var_8280 = const()[name = string("op_8280"), val = tensor([0, 1, 3, 2])]; tensor var_8285 = const()[name = string("op_8285"), val = tensor([1, 8, 128, 64])]; tensor var_8286 = reshape(shape = var_8285, x = value_states_105)[name = string("op_8286")]; tensor var_8291 = const()[name = string("op_8291"), val = tensor([0, 1, 3, 2])]; tensor mean_107_axes_0 = const()[name = string("mean_107_axes_0"), val = tensor([-1])]; bool mean_107_keep_dims_0 = const()[name = string("mean_107_keep_dims_0"), val = bool(true)]; tensor x_261 = transpose(perm = var_8269, x = var_8264)[name = string("transpose_133")]; tensor mean_107 = reduce_mean(axes = mean_107_axes_0, keep_dims = mean_107_keep_dims_0, x = x_261)[name = string("mean_107")]; tensor input_239 = sub(x = x_261, y = mean_107)[name = string("input_239")]; tensor var_8308_axes_0 = const()[name = string("op_8308_axes_0"), val = tensor([-1])]; tensor model_model_layers_13_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_13_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1052160640)))]; fp16 var_8296_to_fp16 = const()[name = string("op_8296_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8308_cast_fp16 = layer_norm(axes = var_8308_axes_0, epsilon = var_8296_to_fp16, gamma = model_model_layers_13_self_attn_q_norm_weight_to_fp16, x = input_239)[name = string("op_8308_cast_fp16")]; tensor mean_109_axes_0 = const()[name = string("mean_109_axes_0"), val = tensor([-1])]; bool mean_109_keep_dims_0 = const()[name = string("mean_109_keep_dims_0"), val = bool(true)]; tensor x_263 = transpose(perm = var_8280, x = var_8275)[name = string("transpose_132")]; tensor mean_109 = reduce_mean(axes = mean_109_axes_0, keep_dims = mean_109_keep_dims_0, x = x_263)[name = string("mean_109")]; tensor input_241 = sub(x = x_263, y = mean_109)[name = string("input_241")]; tensor var_8326_axes_0 = const()[name = string("op_8326_axes_0"), val = tensor([-1])]; tensor model_model_layers_13_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_13_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1052160960)))]; fp16 var_8314_to_fp16 = const()[name = string("op_8314_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8326_cast_fp16 = layer_norm(axes = var_8326_axes_0, epsilon = var_8314_to_fp16, gamma = model_model_layers_13_self_attn_k_norm_weight_to_fp16, x = input_241)[name = string("op_8326_cast_fp16")]; tensor var_8341 = mul(x = var_8308_cast_fp16, y = cos_5)[name = string("op_8341")]; tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_53 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = var_8308_cast_fp16)[name = string("x1_53")]; tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_53 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = var_8308_cast_fp16)[name = string("x2_53")]; fp16 const_293_promoted = const()[name = string("const_293_promoted"), val = fp16(-0x1p+0)]; tensor var_8362 = mul(x = x2_53, y = const_293_promoted)[name = string("op_8362")]; int32 var_8364 = const()[name = string("op_8364"), val = int32(-1)]; bool var_8365_interleave_0 = const()[name = string("op_8365_interleave_0"), val = bool(false)]; tensor var_8365 = concat(axis = var_8364, interleave = var_8365_interleave_0, values = (var_8362, x1_53))[name = string("op_8365")]; tensor var_8366 = mul(x = var_8365, y = sin_5)[name = string("op_8366")]; tensor query_states_107 = add(x = var_8341, y = var_8366)[name = string("query_states_107")]; tensor var_8369 = mul(x = var_8326_cast_fp16, y = cos_5)[name = string("op_8369")]; tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_55 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = var_8326_cast_fp16)[name = string("x1_55")]; tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_55 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = var_8326_cast_fp16)[name = string("x2_55")]; fp16 const_296_promoted = const()[name = string("const_296_promoted"), val = fp16(-0x1p+0)]; tensor var_8390 = mul(x = x2_55, y = const_296_promoted)[name = string("op_8390")]; int32 var_8392 = const()[name = string("op_8392"), val = int32(-1)]; bool var_8393_interleave_0 = const()[name = string("op_8393_interleave_0"), val = bool(false)]; tensor var_8393 = concat(axis = var_8392, interleave = var_8393_interleave_0, values = (var_8390, x1_55))[name = string("op_8393")]; tensor var_8394 = mul(x = var_8393, y = sin_5)[name = string("op_8394")]; tensor key_states_133 = add(x = var_8369, y = var_8394)[name = string("key_states_133")]; tensor expand_dims_156 = const()[name = string("expand_dims_156"), val = tensor([13])]; tensor expand_dims_157 = const()[name = string("expand_dims_157"), val = tensor([0])]; tensor expand_dims_159 = const()[name = string("expand_dims_159"), val = tensor([0])]; tensor expand_dims_160 = const()[name = string("expand_dims_160"), val = tensor([14])]; int32 concat_236_axis_0 = const()[name = string("concat_236_axis_0"), val = int32(0)]; bool concat_236_interleave_0 = const()[name = string("concat_236_interleave_0"), val = bool(false)]; tensor concat_236 = concat(axis = concat_236_axis_0, interleave = concat_236_interleave_0, values = (expand_dims_156, expand_dims_157, current_pos, expand_dims_159))[name = string("concat_236")]; tensor concat_237_values1_0 = const()[name = string("concat_237_values1_0"), val = tensor([0])]; tensor concat_237_values3_0 = const()[name = string("concat_237_values3_0"), val = tensor([0])]; int32 concat_237_axis_0 = const()[name = string("concat_237_axis_0"), val = int32(0)]; bool concat_237_interleave_0 = const()[name = string("concat_237_interleave_0"), val = bool(false)]; tensor concat_237 = concat(axis = concat_237_axis_0, interleave = concat_237_interleave_0, values = (expand_dims_160, concat_237_values1_0, var_1760, concat_237_values3_0))[name = string("concat_237")]; tensor model_model_kv_cache_0_internal_tensor_assign_27_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_236, begin_mask = model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0, end = concat_237, end_mask = model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_27_stride_0, update = key_states_133, x = coreml_update_state_81)[name = string("model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_82_write_state")]; tensor coreml_update_state_82 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_82")]; tensor expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor([41])]; tensor expand_dims_163 = const()[name = string("expand_dims_163"), val = tensor([0])]; tensor expand_dims_165 = const()[name = string("expand_dims_165"), val = tensor([0])]; tensor expand_dims_166 = const()[name = string("expand_dims_166"), val = tensor([42])]; int32 concat_240_axis_0 = const()[name = string("concat_240_axis_0"), val = int32(0)]; bool concat_240_interleave_0 = const()[name = string("concat_240_interleave_0"), val = bool(false)]; tensor concat_240 = concat(axis = concat_240_axis_0, interleave = concat_240_interleave_0, values = (expand_dims_162, expand_dims_163, current_pos, expand_dims_165))[name = string("concat_240")]; tensor concat_241_values1_0 = const()[name = string("concat_241_values1_0"), val = tensor([0])]; tensor concat_241_values3_0 = const()[name = string("concat_241_values3_0"), val = tensor([0])]; int32 concat_241_axis_0 = const()[name = string("concat_241_axis_0"), val = int32(0)]; bool concat_241_interleave_0 = const()[name = string("concat_241_interleave_0"), val = bool(false)]; tensor concat_241 = concat(axis = concat_241_axis_0, interleave = concat_241_interleave_0, values = (expand_dims_166, concat_241_values1_0, var_1760, concat_241_values3_0))[name = string("concat_241")]; tensor model_model_kv_cache_0_internal_tensor_assign_28_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_107 = transpose(perm = var_8291, x = var_8286)[name = string("transpose_131")]; tensor model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_240, begin_mask = model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0, end = concat_241, end_mask = model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_28_stride_0, update = value_states_107, x = coreml_update_state_82)[name = string("model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_83_write_state")]; tensor coreml_update_state_83 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_83")]; tensor var_8465_begin_0 = const()[name = string("op_8465_begin_0"), val = tensor([13, 0, 0, 0])]; tensor var_8465_end_0 = const()[name = string("op_8465_end_0"), val = tensor([14, 8, 1024, 128])]; tensor var_8465_end_mask_0 = const()[name = string("op_8465_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8465_cast_fp16 = slice_by_index(begin = var_8465_begin_0, end = var_8465_end_0, end_mask = var_8465_end_mask_0, x = coreml_update_state_83)[name = string("op_8465_cast_fp16")]; tensor K_layer_cache_27_axes_0 = const()[name = string("K_layer_cache_27_axes_0"), val = tensor([0])]; tensor K_layer_cache_27_cast_fp16 = squeeze(axes = K_layer_cache_27_axes_0, x = var_8465_cast_fp16)[name = string("K_layer_cache_27_cast_fp16")]; tensor var_8472_begin_0 = const()[name = string("op_8472_begin_0"), val = tensor([41, 0, 0, 0])]; tensor var_8472_end_0 = const()[name = string("op_8472_end_0"), val = tensor([42, 8, 1024, 128])]; tensor var_8472_end_mask_0 = const()[name = string("op_8472_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8472_cast_fp16 = slice_by_index(begin = var_8472_begin_0, end = var_8472_end_0, end_mask = var_8472_end_mask_0, x = coreml_update_state_83)[name = string("op_8472_cast_fp16")]; tensor V_layer_cache_27_axes_0 = const()[name = string("V_layer_cache_27_axes_0"), val = tensor([0])]; tensor V_layer_cache_27_cast_fp16 = squeeze(axes = V_layer_cache_27_axes_0, x = var_8472_cast_fp16)[name = string("V_layer_cache_27_cast_fp16")]; tensor x_267_axes_0 = const()[name = string("x_267_axes_0"), val = tensor([1])]; tensor x_267_cast_fp16 = expand_dims(axes = x_267_axes_0, x = K_layer_cache_27_cast_fp16)[name = string("x_267_cast_fp16")]; tensor var_8501 = const()[name = string("op_8501"), val = tensor([1, 2, 1, 1])]; tensor x_269_cast_fp16 = tile(reps = var_8501, x = x_267_cast_fp16)[name = string("x_269_cast_fp16")]; tensor var_8513 = const()[name = string("op_8513"), val = tensor([1, -1, 1024, 128])]; tensor key_states_137_cast_fp16 = reshape(shape = var_8513, x = x_269_cast_fp16)[name = string("key_states_137_cast_fp16")]; tensor x_273_axes_0 = const()[name = string("x_273_axes_0"), val = tensor([1])]; tensor x_273_cast_fp16 = expand_dims(axes = x_273_axes_0, x = V_layer_cache_27_cast_fp16)[name = string("x_273_cast_fp16")]; tensor var_8521 = const()[name = string("op_8521"), val = tensor([1, 2, 1, 1])]; tensor x_275_cast_fp16 = tile(reps = var_8521, x = x_273_cast_fp16)[name = string("x_275_cast_fp16")]; bool var_8548_transpose_x_0 = const()[name = string("op_8548_transpose_x_0"), val = bool(false)]; bool var_8548_transpose_y_0 = const()[name = string("op_8548_transpose_y_0"), val = bool(true)]; tensor var_8548 = matmul(transpose_x = var_8548_transpose_x_0, transpose_y = var_8548_transpose_y_0, x = query_states_107, y = key_states_137_cast_fp16)[name = string("op_8548")]; fp16 var_8549_to_fp16 = const()[name = string("op_8549_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_53_cast_fp16 = mul(x = var_8548, y = var_8549_to_fp16)[name = string("attn_weights_53_cast_fp16")]; tensor attn_weights_55_cast_fp16 = add(x = attn_weights_53_cast_fp16, y = causal_mask)[name = string("attn_weights_55_cast_fp16")]; int32 var_8584 = const()[name = string("op_8584"), val = int32(-1)]; tensor var_8586_cast_fp16 = softmax(axis = var_8584, x = attn_weights_55_cast_fp16)[name = string("op_8586_cast_fp16")]; tensor concat_246 = const()[name = string("concat_246"), val = tensor([16, 64, 1024])]; tensor reshape_39_cast_fp16 = reshape(shape = concat_246, x = var_8586_cast_fp16)[name = string("reshape_39_cast_fp16")]; tensor concat_247 = const()[name = string("concat_247"), val = tensor([16, 1024, 128])]; tensor reshape_40_cast_fp16 = reshape(shape = concat_247, x = x_275_cast_fp16)[name = string("reshape_40_cast_fp16")]; bool matmul_13_transpose_x_0 = const()[name = string("matmul_13_transpose_x_0"), val = bool(false)]; bool matmul_13_transpose_y_0 = const()[name = string("matmul_13_transpose_y_0"), val = bool(false)]; tensor matmul_13_cast_fp16 = matmul(transpose_x = matmul_13_transpose_x_0, transpose_y = matmul_13_transpose_y_0, x = reshape_39_cast_fp16, y = reshape_40_cast_fp16)[name = string("matmul_13_cast_fp16")]; tensor concat_251 = const()[name = string("concat_251"), val = tensor([1, 16, 64, 128])]; tensor reshape_41_cast_fp16 = reshape(shape = concat_251, x = matmul_13_cast_fp16)[name = string("reshape_41_cast_fp16")]; tensor var_8598_perm_0 = const()[name = string("op_8598_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_8617 = const()[name = string("op_8617"), val = tensor([1, 64, 2048])]; tensor var_8598_cast_fp16 = transpose(perm = var_8598_perm_0, x = reshape_41_cast_fp16)[name = string("transpose_130")]; tensor attn_output_135_cast_fp16 = reshape(shape = var_8617, x = var_8598_cast_fp16)[name = string("attn_output_135_cast_fp16")]; tensor var_8622 = const()[name = string("op_8622"), val = tensor([0, 2, 1])]; string var_8638_pad_type_0 = const()[name = string("op_8638_pad_type_0"), val = string("valid")]; int32 var_8638_groups_0 = const()[name = string("op_8638_groups_0"), val = int32(1)]; tensor var_8638_strides_0 = const()[name = string("op_8638_strides_0"), val = tensor([1])]; tensor var_8638_pad_0 = const()[name = string("op_8638_pad_0"), val = tensor([0, 0])]; tensor var_8638_dilations_0 = const()[name = string("op_8638_dilations_0"), val = tensor([1])]; tensor squeeze_13_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1052161280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1055307072))))[name = string("squeeze_13_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_8623_cast_fp16 = transpose(perm = var_8622, x = attn_output_135_cast_fp16)[name = string("transpose_129")]; tensor var_8638_cast_fp16 = conv(dilations = var_8638_dilations_0, groups = var_8638_groups_0, pad = var_8638_pad_0, pad_type = var_8638_pad_type_0, strides = var_8638_strides_0, weight = squeeze_13_cast_fp16_to_fp32_to_fp16_palettized, x = var_8623_cast_fp16)[name = string("op_8638_cast_fp16")]; tensor var_8642 = const()[name = string("op_8642"), val = tensor([0, 2, 1])]; tensor attn_output_139_cast_fp16 = transpose(perm = var_8642, x = var_8638_cast_fp16)[name = string("transpose_128")]; tensor hidden_states_83_cast_fp16 = add(x = hidden_states_79_cast_fp16, y = attn_output_139_cast_fp16)[name = string("hidden_states_83_cast_fp16")]; tensor mean_111_axes_0 = const()[name = string("mean_111_axes_0"), val = tensor([-1])]; bool mean_111_keep_dims_0 = const()[name = string("mean_111_keep_dims_0"), val = bool(true)]; tensor mean_111_cast_fp16 = reduce_mean(axes = mean_111_axes_0, keep_dims = mean_111_keep_dims_0, x = hidden_states_83_cast_fp16)[name = string("mean_111_cast_fp16")]; tensor input_245_cast_fp16 = sub(x = hidden_states_83_cast_fp16, y = mean_111_cast_fp16)[name = string("input_245_cast_fp16")]; tensor var_8661_axes_0 = const()[name = string("op_8661_axes_0"), val = tensor([-1])]; tensor model_model_layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1055339904)))]; fp16 var_8649_to_fp16 = const()[name = string("op_8649_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8661_cast_fp16 = layer_norm(axes = var_8661_axes_0, epsilon = var_8649_to_fp16, gamma = model_model_layers_13_post_attention_layernorm_weight_to_fp16, x = input_245_cast_fp16)[name = string("op_8661_cast_fp16")]; tensor var_8675 = const()[name = string("op_8675"), val = tensor([0, 2, 1])]; tensor input_247_axes_0 = const()[name = string("input_247_axes_0"), val = tensor([2])]; tensor var_8676 = transpose(perm = var_8675, x = var_8661_cast_fp16)[name = string("transpose_127")]; tensor input_247 = expand_dims(axes = input_247_axes_0, x = var_8676)[name = string("input_247")]; string input_249_pad_type_0 = const()[name = string("input_249_pad_type_0"), val = string("valid")]; tensor input_249_strides_0 = const()[name = string("input_249_strides_0"), val = tensor([1, 1])]; tensor input_249_pad_0 = const()[name = string("input_249_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_249_dilations_0 = const()[name = string("input_249_dilations_0"), val = tensor([1, 1])]; int32 input_249_groups_0 = const()[name = string("input_249_groups_0"), val = int32(1)]; tensor input_249 = conv(dilations = input_249_dilations_0, groups = input_249_groups_0, pad = input_249_pad_0, pad_type = input_249_pad_type_0, strides = input_249_strides_0, weight = model_model_layers_13_mlp_gate_proj_weight_palettized, x = input_247)[name = string("input_249")]; string b_27_pad_type_0 = const()[name = string("b_27_pad_type_0"), val = string("valid")]; tensor b_27_strides_0 = const()[name = string("b_27_strides_0"), val = tensor([1, 1])]; tensor b_27_pad_0 = const()[name = string("b_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_27_dilations_0 = const()[name = string("b_27_dilations_0"), val = tensor([1, 1])]; int32 b_27_groups_0 = const()[name = string("b_27_groups_0"), val = int32(1)]; tensor b_27 = conv(dilations = b_27_dilations_0, groups = b_27_groups_0, pad = b_27_pad_0, pad_type = b_27_pad_type_0, strides = b_27_strides_0, weight = model_model_layers_13_mlp_up_proj_weight_palettized, x = input_247)[name = string("b_27")]; tensor c_27 = silu(x = input_249)[name = string("c_27")]; tensor input_251 = mul(x = c_27, y = b_27)[name = string("input_251")]; string e_27_pad_type_0 = const()[name = string("e_27_pad_type_0"), val = string("valid")]; tensor e_27_strides_0 = const()[name = string("e_27_strides_0"), val = tensor([1, 1])]; tensor e_27_pad_0 = const()[name = string("e_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_27_dilations_0 = const()[name = string("e_27_dilations_0"), val = tensor([1, 1])]; int32 e_27_groups_0 = const()[name = string("e_27_groups_0"), val = int32(1)]; tensor e_27 = conv(dilations = e_27_dilations_0, groups = e_27_groups_0, pad = e_27_pad_0, pad_type = e_27_pad_type_0, strides = e_27_strides_0, weight = model_model_layers_13_mlp_down_proj_weight_palettized, x = input_251)[name = string("e_27")]; tensor var_8698_axes_0 = const()[name = string("op_8698_axes_0"), val = tensor([2])]; tensor var_8698 = squeeze(axes = var_8698_axes_0, x = e_27)[name = string("op_8698")]; tensor var_8699 = const()[name = string("op_8699"), val = tensor([0, 2, 1])]; tensor var_8700 = transpose(perm = var_8699, x = var_8698)[name = string("transpose_126")]; tensor hidden_states_85_cast_fp16 = add(x = hidden_states_83_cast_fp16, y = var_8700)[name = string("hidden_states_85_cast_fp16")]; tensor mean_113_axes_0 = const()[name = string("mean_113_axes_0"), val = tensor([-1])]; bool mean_113_keep_dims_0 = const()[name = string("mean_113_keep_dims_0"), val = bool(true)]; tensor mean_113_cast_fp16 = reduce_mean(axes = mean_113_axes_0, keep_dims = mean_113_keep_dims_0, x = hidden_states_85_cast_fp16)[name = string("mean_113_cast_fp16")]; tensor input_253_cast_fp16 = sub(x = hidden_states_85_cast_fp16, y = mean_113_cast_fp16)[name = string("input_253_cast_fp16")]; tensor var_8718_axes_0 = const()[name = string("op_8718_axes_0"), val = tensor([-1])]; tensor model_model_layers_14_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1055344064)))]; fp16 var_8706_to_fp16 = const()[name = string("op_8706_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8718_cast_fp16 = layer_norm(axes = var_8718_axes_0, epsilon = var_8706_to_fp16, gamma = model_model_layers_14_input_layernorm_weight_to_fp16, x = input_253_cast_fp16)[name = string("op_8718_cast_fp16")]; tensor var_8730 = const()[name = string("op_8730"), val = tensor([0, 2, 1])]; tensor var_8733_axes_0 = const()[name = string("op_8733_axes_0"), val = tensor([2])]; tensor var_8731 = transpose(perm = var_8730, x = var_8718_cast_fp16)[name = string("transpose_125")]; tensor var_8733 = expand_dims(axes = var_8733_axes_0, x = var_8731)[name = string("op_8733")]; string query_states_113_pad_type_0 = const()[name = string("query_states_113_pad_type_0"), val = string("valid")]; tensor query_states_113_strides_0 = const()[name = string("query_states_113_strides_0"), val = tensor([1, 1])]; tensor query_states_113_pad_0 = const()[name = string("query_states_113_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_113_dilations_0 = const()[name = string("query_states_113_dilations_0"), val = tensor([1, 1])]; int32 query_states_113_groups_0 = const()[name = string("query_states_113_groups_0"), val = int32(1)]; tensor query_states_113 = conv(dilations = query_states_113_dilations_0, groups = query_states_113_groups_0, pad = query_states_113_pad_0, pad_type = query_states_113_pad_type_0, strides = query_states_113_strides_0, weight = model_model_layers_14_self_attn_q_proj_weight_palettized, x = var_8733)[name = string("query_states_113")]; string key_states_141_pad_type_0 = const()[name = string("key_states_141_pad_type_0"), val = string("valid")]; tensor key_states_141_strides_0 = const()[name = string("key_states_141_strides_0"), val = tensor([1, 1])]; tensor key_states_141_pad_0 = const()[name = string("key_states_141_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_141_dilations_0 = const()[name = string("key_states_141_dilations_0"), val = tensor([1, 1])]; int32 key_states_141_groups_0 = const()[name = string("key_states_141_groups_0"), val = int32(1)]; tensor key_states_141 = conv(dilations = key_states_141_dilations_0, groups = key_states_141_groups_0, pad = key_states_141_pad_0, pad_type = key_states_141_pad_type_0, strides = key_states_141_strides_0, weight = model_model_layers_14_self_attn_k_proj_weight_palettized, x = var_8733)[name = string("key_states_141")]; string value_states_113_pad_type_0 = const()[name = string("value_states_113_pad_type_0"), val = string("valid")]; tensor value_states_113_strides_0 = const()[name = string("value_states_113_strides_0"), val = tensor([1, 1])]; tensor value_states_113_pad_0 = const()[name = string("value_states_113_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_113_dilations_0 = const()[name = string("value_states_113_dilations_0"), val = tensor([1, 1])]; int32 value_states_113_groups_0 = const()[name = string("value_states_113_groups_0"), val = int32(1)]; tensor value_states_113 = conv(dilations = value_states_113_dilations_0, groups = value_states_113_groups_0, pad = value_states_113_pad_0, pad_type = value_states_113_pad_type_0, strides = value_states_113_strides_0, weight = model_model_layers_14_self_attn_v_proj_weight_palettized, x = var_8733)[name = string("value_states_113")]; tensor var_8775 = const()[name = string("op_8775"), val = tensor([1, 16, 128, 64])]; tensor var_8776 = reshape(shape = var_8775, x = query_states_113)[name = string("op_8776")]; tensor var_8781 = const()[name = string("op_8781"), val = tensor([0, 1, 3, 2])]; tensor var_8786 = const()[name = string("op_8786"), val = tensor([1, 8, 128, 64])]; tensor var_8787 = reshape(shape = var_8786, x = key_states_141)[name = string("op_8787")]; tensor var_8792 = const()[name = string("op_8792"), val = tensor([0, 1, 3, 2])]; tensor var_8797 = const()[name = string("op_8797"), val = tensor([1, 8, 128, 64])]; tensor var_8798 = reshape(shape = var_8797, x = value_states_113)[name = string("op_8798")]; tensor var_8803 = const()[name = string("op_8803"), val = tensor([0, 1, 3, 2])]; tensor mean_115_axes_0 = const()[name = string("mean_115_axes_0"), val = tensor([-1])]; bool mean_115_keep_dims_0 = const()[name = string("mean_115_keep_dims_0"), val = bool(true)]; tensor x_281 = transpose(perm = var_8781, x = var_8776)[name = string("transpose_124")]; tensor mean_115 = reduce_mean(axes = mean_115_axes_0, keep_dims = mean_115_keep_dims_0, x = x_281)[name = string("mean_115")]; tensor input_257 = sub(x = x_281, y = mean_115)[name = string("input_257")]; tensor var_8820_axes_0 = const()[name = string("op_8820_axes_0"), val = tensor([-1])]; tensor model_model_layers_14_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_14_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1055348224)))]; fp16 var_8808_to_fp16 = const()[name = string("op_8808_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8820_cast_fp16 = layer_norm(axes = var_8820_axes_0, epsilon = var_8808_to_fp16, gamma = model_model_layers_14_self_attn_q_norm_weight_to_fp16, x = input_257)[name = string("op_8820_cast_fp16")]; tensor mean_117_axes_0 = const()[name = string("mean_117_axes_0"), val = tensor([-1])]; bool mean_117_keep_dims_0 = const()[name = string("mean_117_keep_dims_0"), val = bool(true)]; tensor x_283 = transpose(perm = var_8792, x = var_8787)[name = string("transpose_123")]; tensor mean_117 = reduce_mean(axes = mean_117_axes_0, keep_dims = mean_117_keep_dims_0, x = x_283)[name = string("mean_117")]; tensor input_259 = sub(x = x_283, y = mean_117)[name = string("input_259")]; tensor var_8838_axes_0 = const()[name = string("op_8838_axes_0"), val = tensor([-1])]; tensor model_model_layers_14_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_14_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1055348544)))]; fp16 var_8826_to_fp16 = const()[name = string("op_8826_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8838_cast_fp16 = layer_norm(axes = var_8838_axes_0, epsilon = var_8826_to_fp16, gamma = model_model_layers_14_self_attn_k_norm_weight_to_fp16, x = input_259)[name = string("op_8838_cast_fp16")]; tensor var_8853 = mul(x = var_8820_cast_fp16, y = cos_5)[name = string("op_8853")]; tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_57 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = var_8820_cast_fp16)[name = string("x1_57")]; tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_57 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = var_8820_cast_fp16)[name = string("x2_57")]; fp16 const_315_promoted = const()[name = string("const_315_promoted"), val = fp16(-0x1p+0)]; tensor var_8874 = mul(x = x2_57, y = const_315_promoted)[name = string("op_8874")]; int32 var_8876 = const()[name = string("op_8876"), val = int32(-1)]; bool var_8877_interleave_0 = const()[name = string("op_8877_interleave_0"), val = bool(false)]; tensor var_8877 = concat(axis = var_8876, interleave = var_8877_interleave_0, values = (var_8874, x1_57))[name = string("op_8877")]; tensor var_8878 = mul(x = var_8877, y = sin_5)[name = string("op_8878")]; tensor query_states_115 = add(x = var_8853, y = var_8878)[name = string("query_states_115")]; tensor var_8881 = mul(x = var_8838_cast_fp16, y = cos_5)[name = string("op_8881")]; tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_59 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = var_8838_cast_fp16)[name = string("x1_59")]; tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_59 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = var_8838_cast_fp16)[name = string("x2_59")]; fp16 const_318_promoted = const()[name = string("const_318_promoted"), val = fp16(-0x1p+0)]; tensor var_8902 = mul(x = x2_59, y = const_318_promoted)[name = string("op_8902")]; int32 var_8904 = const()[name = string("op_8904"), val = int32(-1)]; bool var_8905_interleave_0 = const()[name = string("op_8905_interleave_0"), val = bool(false)]; tensor var_8905 = concat(axis = var_8904, interleave = var_8905_interleave_0, values = (var_8902, x1_59))[name = string("op_8905")]; tensor var_8906 = mul(x = var_8905, y = sin_5)[name = string("op_8906")]; tensor key_states_143 = add(x = var_8881, y = var_8906)[name = string("key_states_143")]; tensor expand_dims_168 = const()[name = string("expand_dims_168"), val = tensor([14])]; tensor expand_dims_169 = const()[name = string("expand_dims_169"), val = tensor([0])]; tensor expand_dims_171 = const()[name = string("expand_dims_171"), val = tensor([0])]; tensor expand_dims_172 = const()[name = string("expand_dims_172"), val = tensor([15])]; int32 concat_254_axis_0 = const()[name = string("concat_254_axis_0"), val = int32(0)]; bool concat_254_interleave_0 = const()[name = string("concat_254_interleave_0"), val = bool(false)]; tensor concat_254 = concat(axis = concat_254_axis_0, interleave = concat_254_interleave_0, values = (expand_dims_168, expand_dims_169, current_pos, expand_dims_171))[name = string("concat_254")]; tensor concat_255_values1_0 = const()[name = string("concat_255_values1_0"), val = tensor([0])]; tensor concat_255_values3_0 = const()[name = string("concat_255_values3_0"), val = tensor([0])]; int32 concat_255_axis_0 = const()[name = string("concat_255_axis_0"), val = int32(0)]; bool concat_255_interleave_0 = const()[name = string("concat_255_interleave_0"), val = bool(false)]; tensor concat_255 = concat(axis = concat_255_axis_0, interleave = concat_255_interleave_0, values = (expand_dims_172, concat_255_values1_0, var_1760, concat_255_values3_0))[name = string("concat_255")]; tensor model_model_kv_cache_0_internal_tensor_assign_29_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_254, begin_mask = model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0, end = concat_255, end_mask = model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_29_stride_0, update = key_states_143, x = coreml_update_state_83)[name = string("model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_84_write_state")]; tensor coreml_update_state_84 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_84")]; tensor expand_dims_174 = const()[name = string("expand_dims_174"), val = tensor([42])]; tensor expand_dims_175 = const()[name = string("expand_dims_175"), val = tensor([0])]; tensor expand_dims_177 = const()[name = string("expand_dims_177"), val = tensor([0])]; tensor expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor([43])]; int32 concat_258_axis_0 = const()[name = string("concat_258_axis_0"), val = int32(0)]; bool concat_258_interleave_0 = const()[name = string("concat_258_interleave_0"), val = bool(false)]; tensor concat_258 = concat(axis = concat_258_axis_0, interleave = concat_258_interleave_0, values = (expand_dims_174, expand_dims_175, current_pos, expand_dims_177))[name = string("concat_258")]; tensor concat_259_values1_0 = const()[name = string("concat_259_values1_0"), val = tensor([0])]; tensor concat_259_values3_0 = const()[name = string("concat_259_values3_0"), val = tensor([0])]; int32 concat_259_axis_0 = const()[name = string("concat_259_axis_0"), val = int32(0)]; bool concat_259_interleave_0 = const()[name = string("concat_259_interleave_0"), val = bool(false)]; tensor concat_259 = concat(axis = concat_259_axis_0, interleave = concat_259_interleave_0, values = (expand_dims_178, concat_259_values1_0, var_1760, concat_259_values3_0))[name = string("concat_259")]; tensor model_model_kv_cache_0_internal_tensor_assign_30_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_115 = transpose(perm = var_8803, x = var_8798)[name = string("transpose_122")]; tensor model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_258, begin_mask = model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0, end = concat_259, end_mask = model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_30_stride_0, update = value_states_115, x = coreml_update_state_84)[name = string("model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_85_write_state")]; tensor coreml_update_state_85 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_85")]; tensor var_8977_begin_0 = const()[name = string("op_8977_begin_0"), val = tensor([14, 0, 0, 0])]; tensor var_8977_end_0 = const()[name = string("op_8977_end_0"), val = tensor([15, 8, 1024, 128])]; tensor var_8977_end_mask_0 = const()[name = string("op_8977_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8977_cast_fp16 = slice_by_index(begin = var_8977_begin_0, end = var_8977_end_0, end_mask = var_8977_end_mask_0, x = coreml_update_state_85)[name = string("op_8977_cast_fp16")]; tensor K_layer_cache_29_axes_0 = const()[name = string("K_layer_cache_29_axes_0"), val = tensor([0])]; tensor K_layer_cache_29_cast_fp16 = squeeze(axes = K_layer_cache_29_axes_0, x = var_8977_cast_fp16)[name = string("K_layer_cache_29_cast_fp16")]; tensor var_8984_begin_0 = const()[name = string("op_8984_begin_0"), val = tensor([42, 0, 0, 0])]; tensor var_8984_end_0 = const()[name = string("op_8984_end_0"), val = tensor([43, 8, 1024, 128])]; tensor var_8984_end_mask_0 = const()[name = string("op_8984_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8984_cast_fp16 = slice_by_index(begin = var_8984_begin_0, end = var_8984_end_0, end_mask = var_8984_end_mask_0, x = coreml_update_state_85)[name = string("op_8984_cast_fp16")]; tensor V_layer_cache_29_axes_0 = const()[name = string("V_layer_cache_29_axes_0"), val = tensor([0])]; tensor V_layer_cache_29_cast_fp16 = squeeze(axes = V_layer_cache_29_axes_0, x = var_8984_cast_fp16)[name = string("V_layer_cache_29_cast_fp16")]; tensor x_287_axes_0 = const()[name = string("x_287_axes_0"), val = tensor([1])]; tensor x_287_cast_fp16 = expand_dims(axes = x_287_axes_0, x = K_layer_cache_29_cast_fp16)[name = string("x_287_cast_fp16")]; tensor var_9013 = const()[name = string("op_9013"), val = tensor([1, 2, 1, 1])]; tensor x_289_cast_fp16 = tile(reps = var_9013, x = x_287_cast_fp16)[name = string("x_289_cast_fp16")]; tensor var_9025 = const()[name = string("op_9025"), val = tensor([1, -1, 1024, 128])]; tensor key_states_147_cast_fp16 = reshape(shape = var_9025, x = x_289_cast_fp16)[name = string("key_states_147_cast_fp16")]; tensor x_293_axes_0 = const()[name = string("x_293_axes_0"), val = tensor([1])]; tensor x_293_cast_fp16 = expand_dims(axes = x_293_axes_0, x = V_layer_cache_29_cast_fp16)[name = string("x_293_cast_fp16")]; tensor var_9033 = const()[name = string("op_9033"), val = tensor([1, 2, 1, 1])]; tensor x_295_cast_fp16 = tile(reps = var_9033, x = x_293_cast_fp16)[name = string("x_295_cast_fp16")]; bool var_9060_transpose_x_0 = const()[name = string("op_9060_transpose_x_0"), val = bool(false)]; bool var_9060_transpose_y_0 = const()[name = string("op_9060_transpose_y_0"), val = bool(true)]; tensor var_9060 = matmul(transpose_x = var_9060_transpose_x_0, transpose_y = var_9060_transpose_y_0, x = query_states_115, y = key_states_147_cast_fp16)[name = string("op_9060")]; fp16 var_9061_to_fp16 = const()[name = string("op_9061_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_57_cast_fp16 = mul(x = var_9060, y = var_9061_to_fp16)[name = string("attn_weights_57_cast_fp16")]; tensor attn_weights_59_cast_fp16 = add(x = attn_weights_57_cast_fp16, y = causal_mask)[name = string("attn_weights_59_cast_fp16")]; int32 var_9096 = const()[name = string("op_9096"), val = int32(-1)]; tensor var_9098_cast_fp16 = softmax(axis = var_9096, x = attn_weights_59_cast_fp16)[name = string("op_9098_cast_fp16")]; tensor concat_264 = const()[name = string("concat_264"), val = tensor([16, 64, 1024])]; tensor reshape_42_cast_fp16 = reshape(shape = concat_264, x = var_9098_cast_fp16)[name = string("reshape_42_cast_fp16")]; tensor concat_265 = const()[name = string("concat_265"), val = tensor([16, 1024, 128])]; tensor reshape_43_cast_fp16 = reshape(shape = concat_265, x = x_295_cast_fp16)[name = string("reshape_43_cast_fp16")]; bool matmul_14_transpose_x_0 = const()[name = string("matmul_14_transpose_x_0"), val = bool(false)]; bool matmul_14_transpose_y_0 = const()[name = string("matmul_14_transpose_y_0"), val = bool(false)]; tensor matmul_14_cast_fp16 = matmul(transpose_x = matmul_14_transpose_x_0, transpose_y = matmul_14_transpose_y_0, x = reshape_42_cast_fp16, y = reshape_43_cast_fp16)[name = string("matmul_14_cast_fp16")]; tensor concat_269 = const()[name = string("concat_269"), val = tensor([1, 16, 64, 128])]; tensor reshape_44_cast_fp16 = reshape(shape = concat_269, x = matmul_14_cast_fp16)[name = string("reshape_44_cast_fp16")]; tensor var_9110_perm_0 = const()[name = string("op_9110_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_9129 = const()[name = string("op_9129"), val = tensor([1, 64, 2048])]; tensor var_9110_cast_fp16 = transpose(perm = var_9110_perm_0, x = reshape_44_cast_fp16)[name = string("transpose_121")]; tensor attn_output_145_cast_fp16 = reshape(shape = var_9129, x = var_9110_cast_fp16)[name = string("attn_output_145_cast_fp16")]; tensor var_9134 = const()[name = string("op_9134"), val = tensor([0, 2, 1])]; string var_9150_pad_type_0 = const()[name = string("op_9150_pad_type_0"), val = string("valid")]; int32 var_9150_groups_0 = const()[name = string("op_9150_groups_0"), val = int32(1)]; tensor var_9150_strides_0 = const()[name = string("op_9150_strides_0"), val = tensor([1])]; tensor var_9150_pad_0 = const()[name = string("op_9150_pad_0"), val = tensor([0, 0])]; tensor var_9150_dilations_0 = const()[name = string("op_9150_dilations_0"), val = tensor([1])]; tensor squeeze_14_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1055348864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1058494656))))[name = string("squeeze_14_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_9135_cast_fp16 = transpose(perm = var_9134, x = attn_output_145_cast_fp16)[name = string("transpose_120")]; tensor var_9150_cast_fp16 = conv(dilations = var_9150_dilations_0, groups = var_9150_groups_0, pad = var_9150_pad_0, pad_type = var_9150_pad_type_0, strides = var_9150_strides_0, weight = squeeze_14_cast_fp16_to_fp32_to_fp16_palettized, x = var_9135_cast_fp16)[name = string("op_9150_cast_fp16")]; tensor var_9154 = const()[name = string("op_9154"), val = tensor([0, 2, 1])]; tensor attn_output_149_cast_fp16 = transpose(perm = var_9154, x = var_9150_cast_fp16)[name = string("transpose_119")]; tensor hidden_states_89_cast_fp16 = add(x = hidden_states_85_cast_fp16, y = attn_output_149_cast_fp16)[name = string("hidden_states_89_cast_fp16")]; tensor mean_119_axes_0 = const()[name = string("mean_119_axes_0"), val = tensor([-1])]; bool mean_119_keep_dims_0 = const()[name = string("mean_119_keep_dims_0"), val = bool(true)]; tensor mean_119_cast_fp16 = reduce_mean(axes = mean_119_axes_0, keep_dims = mean_119_keep_dims_0, x = hidden_states_89_cast_fp16)[name = string("mean_119_cast_fp16")]; tensor input_263_cast_fp16 = sub(x = hidden_states_89_cast_fp16, y = mean_119_cast_fp16)[name = string("input_263_cast_fp16")]; tensor var_9173_axes_0 = const()[name = string("op_9173_axes_0"), val = tensor([-1])]; tensor model_model_layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1058527488)))]; fp16 var_9161_to_fp16 = const()[name = string("op_9161_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9173_cast_fp16 = layer_norm(axes = var_9173_axes_0, epsilon = var_9161_to_fp16, gamma = model_model_layers_14_post_attention_layernorm_weight_to_fp16, x = input_263_cast_fp16)[name = string("op_9173_cast_fp16")]; tensor var_9187 = const()[name = string("op_9187"), val = tensor([0, 2, 1])]; tensor input_265_axes_0 = const()[name = string("input_265_axes_0"), val = tensor([2])]; tensor var_9188 = transpose(perm = var_9187, x = var_9173_cast_fp16)[name = string("transpose_118")]; tensor input_265 = expand_dims(axes = input_265_axes_0, x = var_9188)[name = string("input_265")]; string input_267_pad_type_0 = const()[name = string("input_267_pad_type_0"), val = string("valid")]; tensor input_267_strides_0 = const()[name = string("input_267_strides_0"), val = tensor([1, 1])]; tensor input_267_pad_0 = const()[name = string("input_267_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_267_dilations_0 = const()[name = string("input_267_dilations_0"), val = tensor([1, 1])]; int32 input_267_groups_0 = const()[name = string("input_267_groups_0"), val = int32(1)]; tensor input_267 = conv(dilations = input_267_dilations_0, groups = input_267_groups_0, pad = input_267_pad_0, pad_type = input_267_pad_type_0, strides = input_267_strides_0, weight = model_model_layers_14_mlp_gate_proj_weight_palettized, x = input_265)[name = string("input_267")]; string b_29_pad_type_0 = const()[name = string("b_29_pad_type_0"), val = string("valid")]; tensor b_29_strides_0 = const()[name = string("b_29_strides_0"), val = tensor([1, 1])]; tensor b_29_pad_0 = const()[name = string("b_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_29_dilations_0 = const()[name = string("b_29_dilations_0"), val = tensor([1, 1])]; int32 b_29_groups_0 = const()[name = string("b_29_groups_0"), val = int32(1)]; tensor b_29 = conv(dilations = b_29_dilations_0, groups = b_29_groups_0, pad = b_29_pad_0, pad_type = b_29_pad_type_0, strides = b_29_strides_0, weight = model_model_layers_14_mlp_up_proj_weight_palettized, x = input_265)[name = string("b_29")]; tensor c_29 = silu(x = input_267)[name = string("c_29")]; tensor input_269 = mul(x = c_29, y = b_29)[name = string("input_269")]; string e_29_pad_type_0 = const()[name = string("e_29_pad_type_0"), val = string("valid")]; tensor e_29_strides_0 = const()[name = string("e_29_strides_0"), val = tensor([1, 1])]; tensor e_29_pad_0 = const()[name = string("e_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_29_dilations_0 = const()[name = string("e_29_dilations_0"), val = tensor([1, 1])]; int32 e_29_groups_0 = const()[name = string("e_29_groups_0"), val = int32(1)]; tensor e_29 = conv(dilations = e_29_dilations_0, groups = e_29_groups_0, pad = e_29_pad_0, pad_type = e_29_pad_type_0, strides = e_29_strides_0, weight = model_model_layers_14_mlp_down_proj_weight_palettized, x = input_269)[name = string("e_29")]; tensor var_9210_axes_0 = const()[name = string("op_9210_axes_0"), val = tensor([2])]; tensor var_9210 = squeeze(axes = var_9210_axes_0, x = e_29)[name = string("op_9210")]; tensor var_9211 = const()[name = string("op_9211"), val = tensor([0, 2, 1])]; tensor var_9212 = transpose(perm = var_9211, x = var_9210)[name = string("transpose_117")]; tensor hidden_states_91_cast_fp16 = add(x = hidden_states_89_cast_fp16, y = var_9212)[name = string("hidden_states_91_cast_fp16")]; tensor mean_121_axes_0 = const()[name = string("mean_121_axes_0"), val = tensor([-1])]; bool mean_121_keep_dims_0 = const()[name = string("mean_121_keep_dims_0"), val = bool(true)]; tensor mean_121_cast_fp16 = reduce_mean(axes = mean_121_axes_0, keep_dims = mean_121_keep_dims_0, x = hidden_states_91_cast_fp16)[name = string("mean_121_cast_fp16")]; tensor input_271_cast_fp16 = sub(x = hidden_states_91_cast_fp16, y = mean_121_cast_fp16)[name = string("input_271_cast_fp16")]; tensor var_9230_axes_0 = const()[name = string("op_9230_axes_0"), val = tensor([-1])]; tensor model_model_layers_15_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1058531648)))]; fp16 var_9218_to_fp16 = const()[name = string("op_9218_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9230_cast_fp16 = layer_norm(axes = var_9230_axes_0, epsilon = var_9218_to_fp16, gamma = model_model_layers_15_input_layernorm_weight_to_fp16, x = input_271_cast_fp16)[name = string("op_9230_cast_fp16")]; tensor var_9242 = const()[name = string("op_9242"), val = tensor([0, 2, 1])]; tensor var_9245_axes_0 = const()[name = string("op_9245_axes_0"), val = tensor([2])]; tensor var_9243 = transpose(perm = var_9242, x = var_9230_cast_fp16)[name = string("transpose_116")]; tensor var_9245 = expand_dims(axes = var_9245_axes_0, x = var_9243)[name = string("op_9245")]; string query_states_121_pad_type_0 = const()[name = string("query_states_121_pad_type_0"), val = string("valid")]; tensor query_states_121_strides_0 = const()[name = string("query_states_121_strides_0"), val = tensor([1, 1])]; tensor query_states_121_pad_0 = const()[name = string("query_states_121_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_121_dilations_0 = const()[name = string("query_states_121_dilations_0"), val = tensor([1, 1])]; int32 query_states_121_groups_0 = const()[name = string("query_states_121_groups_0"), val = int32(1)]; tensor query_states_121 = conv(dilations = query_states_121_dilations_0, groups = query_states_121_groups_0, pad = query_states_121_pad_0, pad_type = query_states_121_pad_type_0, strides = query_states_121_strides_0, weight = model_model_layers_15_self_attn_q_proj_weight_palettized, x = var_9245)[name = string("query_states_121")]; string key_states_151_pad_type_0 = const()[name = string("key_states_151_pad_type_0"), val = string("valid")]; tensor key_states_151_strides_0 = const()[name = string("key_states_151_strides_0"), val = tensor([1, 1])]; tensor key_states_151_pad_0 = const()[name = string("key_states_151_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_151_dilations_0 = const()[name = string("key_states_151_dilations_0"), val = tensor([1, 1])]; int32 key_states_151_groups_0 = const()[name = string("key_states_151_groups_0"), val = int32(1)]; tensor key_states_151 = conv(dilations = key_states_151_dilations_0, groups = key_states_151_groups_0, pad = key_states_151_pad_0, pad_type = key_states_151_pad_type_0, strides = key_states_151_strides_0, weight = model_model_layers_15_self_attn_k_proj_weight_palettized, x = var_9245)[name = string("key_states_151")]; string value_states_121_pad_type_0 = const()[name = string("value_states_121_pad_type_0"), val = string("valid")]; tensor value_states_121_strides_0 = const()[name = string("value_states_121_strides_0"), val = tensor([1, 1])]; tensor value_states_121_pad_0 = const()[name = string("value_states_121_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_121_dilations_0 = const()[name = string("value_states_121_dilations_0"), val = tensor([1, 1])]; int32 value_states_121_groups_0 = const()[name = string("value_states_121_groups_0"), val = int32(1)]; tensor value_states_121 = conv(dilations = value_states_121_dilations_0, groups = value_states_121_groups_0, pad = value_states_121_pad_0, pad_type = value_states_121_pad_type_0, strides = value_states_121_strides_0, weight = model_model_layers_15_self_attn_v_proj_weight_palettized, x = var_9245)[name = string("value_states_121")]; tensor var_9287 = const()[name = string("op_9287"), val = tensor([1, 16, 128, 64])]; tensor var_9288 = reshape(shape = var_9287, x = query_states_121)[name = string("op_9288")]; tensor var_9293 = const()[name = string("op_9293"), val = tensor([0, 1, 3, 2])]; tensor var_9298 = const()[name = string("op_9298"), val = tensor([1, 8, 128, 64])]; tensor var_9299 = reshape(shape = var_9298, x = key_states_151)[name = string("op_9299")]; tensor var_9304 = const()[name = string("op_9304"), val = tensor([0, 1, 3, 2])]; tensor var_9309 = const()[name = string("op_9309"), val = tensor([1, 8, 128, 64])]; tensor var_9310 = reshape(shape = var_9309, x = value_states_121)[name = string("op_9310")]; tensor var_9315 = const()[name = string("op_9315"), val = tensor([0, 1, 3, 2])]; tensor mean_123_axes_0 = const()[name = string("mean_123_axes_0"), val = tensor([-1])]; bool mean_123_keep_dims_0 = const()[name = string("mean_123_keep_dims_0"), val = bool(true)]; tensor x_301 = transpose(perm = var_9293, x = var_9288)[name = string("transpose_115")]; tensor mean_123 = reduce_mean(axes = mean_123_axes_0, keep_dims = mean_123_keep_dims_0, x = x_301)[name = string("mean_123")]; tensor input_275 = sub(x = x_301, y = mean_123)[name = string("input_275")]; tensor var_9332_axes_0 = const()[name = string("op_9332_axes_0"), val = tensor([-1])]; tensor model_model_layers_15_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_15_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1058535808)))]; fp16 var_9320_to_fp16 = const()[name = string("op_9320_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9332_cast_fp16 = layer_norm(axes = var_9332_axes_0, epsilon = var_9320_to_fp16, gamma = model_model_layers_15_self_attn_q_norm_weight_to_fp16, x = input_275)[name = string("op_9332_cast_fp16")]; tensor mean_125_axes_0 = const()[name = string("mean_125_axes_0"), val = tensor([-1])]; bool mean_125_keep_dims_0 = const()[name = string("mean_125_keep_dims_0"), val = bool(true)]; tensor x_303 = transpose(perm = var_9304, x = var_9299)[name = string("transpose_114")]; tensor mean_125 = reduce_mean(axes = mean_125_axes_0, keep_dims = mean_125_keep_dims_0, x = x_303)[name = string("mean_125")]; tensor input_277 = sub(x = x_303, y = mean_125)[name = string("input_277")]; tensor var_9350_axes_0 = const()[name = string("op_9350_axes_0"), val = tensor([-1])]; tensor model_model_layers_15_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_15_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1058536128)))]; fp16 var_9338_to_fp16 = const()[name = string("op_9338_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9350_cast_fp16 = layer_norm(axes = var_9350_axes_0, epsilon = var_9338_to_fp16, gamma = model_model_layers_15_self_attn_k_norm_weight_to_fp16, x = input_277)[name = string("op_9350_cast_fp16")]; tensor var_9365 = mul(x = var_9332_cast_fp16, y = cos_5)[name = string("op_9365")]; tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_61 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = var_9332_cast_fp16)[name = string("x1_61")]; tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_61 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = var_9332_cast_fp16)[name = string("x2_61")]; fp16 const_337_promoted = const()[name = string("const_337_promoted"), val = fp16(-0x1p+0)]; tensor var_9386 = mul(x = x2_61, y = const_337_promoted)[name = string("op_9386")]; int32 var_9388 = const()[name = string("op_9388"), val = int32(-1)]; bool var_9389_interleave_0 = const()[name = string("op_9389_interleave_0"), val = bool(false)]; tensor var_9389 = concat(axis = var_9388, interleave = var_9389_interleave_0, values = (var_9386, x1_61))[name = string("op_9389")]; tensor var_9390 = mul(x = var_9389, y = sin_5)[name = string("op_9390")]; tensor query_states_123 = add(x = var_9365, y = var_9390)[name = string("query_states_123")]; tensor var_9393 = mul(x = var_9350_cast_fp16, y = cos_5)[name = string("op_9393")]; tensor x1_63_begin_0 = const()[name = string("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_63_end_0 = const()[name = string("x1_63_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_63_end_mask_0 = const()[name = string("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_63 = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = var_9350_cast_fp16)[name = string("x1_63")]; tensor x2_63_begin_0 = const()[name = string("x2_63_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_63_end_0 = const()[name = string("x2_63_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_63_end_mask_0 = const()[name = string("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_63 = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = var_9350_cast_fp16)[name = string("x2_63")]; fp16 const_340_promoted = const()[name = string("const_340_promoted"), val = fp16(-0x1p+0)]; tensor var_9414 = mul(x = x2_63, y = const_340_promoted)[name = string("op_9414")]; int32 var_9416 = const()[name = string("op_9416"), val = int32(-1)]; bool var_9417_interleave_0 = const()[name = string("op_9417_interleave_0"), val = bool(false)]; tensor var_9417 = concat(axis = var_9416, interleave = var_9417_interleave_0, values = (var_9414, x1_63))[name = string("op_9417")]; tensor var_9418 = mul(x = var_9417, y = sin_5)[name = string("op_9418")]; tensor key_states_153 = add(x = var_9393, y = var_9418)[name = string("key_states_153")]; tensor expand_dims_180 = const()[name = string("expand_dims_180"), val = tensor([15])]; tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([0])]; tensor expand_dims_183 = const()[name = string("expand_dims_183"), val = tensor([0])]; tensor expand_dims_184 = const()[name = string("expand_dims_184"), val = tensor([16])]; int32 concat_272_axis_0 = const()[name = string("concat_272_axis_0"), val = int32(0)]; bool concat_272_interleave_0 = const()[name = string("concat_272_interleave_0"), val = bool(false)]; tensor concat_272 = concat(axis = concat_272_axis_0, interleave = concat_272_interleave_0, values = (expand_dims_180, expand_dims_181, current_pos, expand_dims_183))[name = string("concat_272")]; tensor concat_273_values1_0 = const()[name = string("concat_273_values1_0"), val = tensor([0])]; tensor concat_273_values3_0 = const()[name = string("concat_273_values3_0"), val = tensor([0])]; int32 concat_273_axis_0 = const()[name = string("concat_273_axis_0"), val = int32(0)]; bool concat_273_interleave_0 = const()[name = string("concat_273_interleave_0"), val = bool(false)]; tensor concat_273 = concat(axis = concat_273_axis_0, interleave = concat_273_interleave_0, values = (expand_dims_184, concat_273_values1_0, var_1760, concat_273_values3_0))[name = string("concat_273")]; tensor model_model_kv_cache_0_internal_tensor_assign_31_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_272, begin_mask = model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0, end = concat_273, end_mask = model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_31_stride_0, update = key_states_153, x = coreml_update_state_85)[name = string("model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_86_write_state")]; tensor coreml_update_state_86 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_86")]; tensor expand_dims_186 = const()[name = string("expand_dims_186"), val = tensor([43])]; tensor expand_dims_187 = const()[name = string("expand_dims_187"), val = tensor([0])]; tensor expand_dims_189 = const()[name = string("expand_dims_189"), val = tensor([0])]; tensor expand_dims_190 = const()[name = string("expand_dims_190"), val = tensor([44])]; int32 concat_276_axis_0 = const()[name = string("concat_276_axis_0"), val = int32(0)]; bool concat_276_interleave_0 = const()[name = string("concat_276_interleave_0"), val = bool(false)]; tensor concat_276 = concat(axis = concat_276_axis_0, interleave = concat_276_interleave_0, values = (expand_dims_186, expand_dims_187, current_pos, expand_dims_189))[name = string("concat_276")]; tensor concat_277_values1_0 = const()[name = string("concat_277_values1_0"), val = tensor([0])]; tensor concat_277_values3_0 = const()[name = string("concat_277_values3_0"), val = tensor([0])]; int32 concat_277_axis_0 = const()[name = string("concat_277_axis_0"), val = int32(0)]; bool concat_277_interleave_0 = const()[name = string("concat_277_interleave_0"), val = bool(false)]; tensor concat_277 = concat(axis = concat_277_axis_0, interleave = concat_277_interleave_0, values = (expand_dims_190, concat_277_values1_0, var_1760, concat_277_values3_0))[name = string("concat_277")]; tensor model_model_kv_cache_0_internal_tensor_assign_32_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_123 = transpose(perm = var_9315, x = var_9310)[name = string("transpose_113")]; tensor model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_276, begin_mask = model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0, end = concat_277, end_mask = model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_32_stride_0, update = value_states_123, x = coreml_update_state_86)[name = string("model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_87_write_state")]; tensor coreml_update_state_87 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_87")]; tensor var_9489_begin_0 = const()[name = string("op_9489_begin_0"), val = tensor([15, 0, 0, 0])]; tensor var_9489_end_0 = const()[name = string("op_9489_end_0"), val = tensor([16, 8, 1024, 128])]; tensor var_9489_end_mask_0 = const()[name = string("op_9489_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_9489_cast_fp16 = slice_by_index(begin = var_9489_begin_0, end = var_9489_end_0, end_mask = var_9489_end_mask_0, x = coreml_update_state_87)[name = string("op_9489_cast_fp16")]; tensor K_layer_cache_31_axes_0 = const()[name = string("K_layer_cache_31_axes_0"), val = tensor([0])]; tensor K_layer_cache_31_cast_fp16 = squeeze(axes = K_layer_cache_31_axes_0, x = var_9489_cast_fp16)[name = string("K_layer_cache_31_cast_fp16")]; tensor var_9496_begin_0 = const()[name = string("op_9496_begin_0"), val = tensor([43, 0, 0, 0])]; tensor var_9496_end_0 = const()[name = string("op_9496_end_0"), val = tensor([44, 8, 1024, 128])]; tensor var_9496_end_mask_0 = const()[name = string("op_9496_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_9496_cast_fp16 = slice_by_index(begin = var_9496_begin_0, end = var_9496_end_0, end_mask = var_9496_end_mask_0, x = coreml_update_state_87)[name = string("op_9496_cast_fp16")]; tensor V_layer_cache_31_axes_0 = const()[name = string("V_layer_cache_31_axes_0"), val = tensor([0])]; tensor V_layer_cache_31_cast_fp16 = squeeze(axes = V_layer_cache_31_axes_0, x = var_9496_cast_fp16)[name = string("V_layer_cache_31_cast_fp16")]; tensor x_307_axes_0 = const()[name = string("x_307_axes_0"), val = tensor([1])]; tensor x_307_cast_fp16 = expand_dims(axes = x_307_axes_0, x = K_layer_cache_31_cast_fp16)[name = string("x_307_cast_fp16")]; tensor var_9525 = const()[name = string("op_9525"), val = tensor([1, 2, 1, 1])]; tensor x_309_cast_fp16 = tile(reps = var_9525, x = x_307_cast_fp16)[name = string("x_309_cast_fp16")]; tensor var_9537 = const()[name = string("op_9537"), val = tensor([1, -1, 1024, 128])]; tensor key_states_157_cast_fp16 = reshape(shape = var_9537, x = x_309_cast_fp16)[name = string("key_states_157_cast_fp16")]; tensor x_313_axes_0 = const()[name = string("x_313_axes_0"), val = tensor([1])]; tensor x_313_cast_fp16 = expand_dims(axes = x_313_axes_0, x = V_layer_cache_31_cast_fp16)[name = string("x_313_cast_fp16")]; tensor var_9545 = const()[name = string("op_9545"), val = tensor([1, 2, 1, 1])]; tensor x_315_cast_fp16 = tile(reps = var_9545, x = x_313_cast_fp16)[name = string("x_315_cast_fp16")]; bool var_9572_transpose_x_0 = const()[name = string("op_9572_transpose_x_0"), val = bool(false)]; bool var_9572_transpose_y_0 = const()[name = string("op_9572_transpose_y_0"), val = bool(true)]; tensor var_9572 = matmul(transpose_x = var_9572_transpose_x_0, transpose_y = var_9572_transpose_y_0, x = query_states_123, y = key_states_157_cast_fp16)[name = string("op_9572")]; fp16 var_9573_to_fp16 = const()[name = string("op_9573_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_61_cast_fp16 = mul(x = var_9572, y = var_9573_to_fp16)[name = string("attn_weights_61_cast_fp16")]; tensor attn_weights_63_cast_fp16 = add(x = attn_weights_61_cast_fp16, y = causal_mask)[name = string("attn_weights_63_cast_fp16")]; int32 var_9608 = const()[name = string("op_9608"), val = int32(-1)]; tensor var_9610_cast_fp16 = softmax(axis = var_9608, x = attn_weights_63_cast_fp16)[name = string("op_9610_cast_fp16")]; tensor concat_282 = const()[name = string("concat_282"), val = tensor([16, 64, 1024])]; tensor reshape_45_cast_fp16 = reshape(shape = concat_282, x = var_9610_cast_fp16)[name = string("reshape_45_cast_fp16")]; tensor concat_283 = const()[name = string("concat_283"), val = tensor([16, 1024, 128])]; tensor reshape_46_cast_fp16 = reshape(shape = concat_283, x = x_315_cast_fp16)[name = string("reshape_46_cast_fp16")]; bool matmul_15_transpose_x_0 = const()[name = string("matmul_15_transpose_x_0"), val = bool(false)]; bool matmul_15_transpose_y_0 = const()[name = string("matmul_15_transpose_y_0"), val = bool(false)]; tensor matmul_15_cast_fp16 = matmul(transpose_x = matmul_15_transpose_x_0, transpose_y = matmul_15_transpose_y_0, x = reshape_45_cast_fp16, y = reshape_46_cast_fp16)[name = string("matmul_15_cast_fp16")]; tensor concat_287 = const()[name = string("concat_287"), val = tensor([1, 16, 64, 128])]; tensor reshape_47_cast_fp16 = reshape(shape = concat_287, x = matmul_15_cast_fp16)[name = string("reshape_47_cast_fp16")]; tensor var_9622_perm_0 = const()[name = string("op_9622_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_9641 = const()[name = string("op_9641"), val = tensor([1, 64, 2048])]; tensor var_9622_cast_fp16 = transpose(perm = var_9622_perm_0, x = reshape_47_cast_fp16)[name = string("transpose_112")]; tensor attn_output_155_cast_fp16 = reshape(shape = var_9641, x = var_9622_cast_fp16)[name = string("attn_output_155_cast_fp16")]; tensor var_9646 = const()[name = string("op_9646"), val = tensor([0, 2, 1])]; string var_9662_pad_type_0 = const()[name = string("op_9662_pad_type_0"), val = string("valid")]; int32 var_9662_groups_0 = const()[name = string("op_9662_groups_0"), val = int32(1)]; tensor var_9662_strides_0 = const()[name = string("op_9662_strides_0"), val = tensor([1])]; tensor var_9662_pad_0 = const()[name = string("op_9662_pad_0"), val = tensor([0, 0])]; tensor var_9662_dilations_0 = const()[name = string("op_9662_dilations_0"), val = tensor([1])]; tensor squeeze_15_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1058536448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1061682240))))[name = string("squeeze_15_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_9647_cast_fp16 = transpose(perm = var_9646, x = attn_output_155_cast_fp16)[name = string("transpose_111")]; tensor var_9662_cast_fp16 = conv(dilations = var_9662_dilations_0, groups = var_9662_groups_0, pad = var_9662_pad_0, pad_type = var_9662_pad_type_0, strides = var_9662_strides_0, weight = squeeze_15_cast_fp16_to_fp32_to_fp16_palettized, x = var_9647_cast_fp16)[name = string("op_9662_cast_fp16")]; tensor var_9666 = const()[name = string("op_9666"), val = tensor([0, 2, 1])]; tensor attn_output_159_cast_fp16 = transpose(perm = var_9666, x = var_9662_cast_fp16)[name = string("transpose_110")]; tensor hidden_states_95_cast_fp16 = add(x = hidden_states_91_cast_fp16, y = attn_output_159_cast_fp16)[name = string("hidden_states_95_cast_fp16")]; tensor mean_127_axes_0 = const()[name = string("mean_127_axes_0"), val = tensor([-1])]; bool mean_127_keep_dims_0 = const()[name = string("mean_127_keep_dims_0"), val = bool(true)]; tensor mean_127_cast_fp16 = reduce_mean(axes = mean_127_axes_0, keep_dims = mean_127_keep_dims_0, x = hidden_states_95_cast_fp16)[name = string("mean_127_cast_fp16")]; tensor input_281_cast_fp16 = sub(x = hidden_states_95_cast_fp16, y = mean_127_cast_fp16)[name = string("input_281_cast_fp16")]; tensor var_9685_axes_0 = const()[name = string("op_9685_axes_0"), val = tensor([-1])]; tensor model_model_layers_15_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1061715072)))]; fp16 var_9673_to_fp16 = const()[name = string("op_9673_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9685_cast_fp16 = layer_norm(axes = var_9685_axes_0, epsilon = var_9673_to_fp16, gamma = model_model_layers_15_post_attention_layernorm_weight_to_fp16, x = input_281_cast_fp16)[name = string("op_9685_cast_fp16")]; tensor var_9699 = const()[name = string("op_9699"), val = tensor([0, 2, 1])]; tensor input_283_axes_0 = const()[name = string("input_283_axes_0"), val = tensor([2])]; tensor var_9700 = transpose(perm = var_9699, x = var_9685_cast_fp16)[name = string("transpose_109")]; tensor input_283 = expand_dims(axes = input_283_axes_0, x = var_9700)[name = string("input_283")]; string input_285_pad_type_0 = const()[name = string("input_285_pad_type_0"), val = string("valid")]; tensor input_285_strides_0 = const()[name = string("input_285_strides_0"), val = tensor([1, 1])]; tensor input_285_pad_0 = const()[name = string("input_285_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_285_dilations_0 = const()[name = string("input_285_dilations_0"), val = tensor([1, 1])]; int32 input_285_groups_0 = const()[name = string("input_285_groups_0"), val = int32(1)]; tensor input_285 = conv(dilations = input_285_dilations_0, groups = input_285_groups_0, pad = input_285_pad_0, pad_type = input_285_pad_type_0, strides = input_285_strides_0, weight = model_model_layers_15_mlp_gate_proj_weight_palettized, x = input_283)[name = string("input_285")]; string b_31_pad_type_0 = const()[name = string("b_31_pad_type_0"), val = string("valid")]; tensor b_31_strides_0 = const()[name = string("b_31_strides_0"), val = tensor([1, 1])]; tensor b_31_pad_0 = const()[name = string("b_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_31_dilations_0 = const()[name = string("b_31_dilations_0"), val = tensor([1, 1])]; int32 b_31_groups_0 = const()[name = string("b_31_groups_0"), val = int32(1)]; tensor b_31 = conv(dilations = b_31_dilations_0, groups = b_31_groups_0, pad = b_31_pad_0, pad_type = b_31_pad_type_0, strides = b_31_strides_0, weight = model_model_layers_15_mlp_up_proj_weight_palettized, x = input_283)[name = string("b_31")]; tensor c_31 = silu(x = input_285)[name = string("c_31")]; tensor input_287 = mul(x = c_31, y = b_31)[name = string("input_287")]; string e_31_pad_type_0 = const()[name = string("e_31_pad_type_0"), val = string("valid")]; tensor e_31_strides_0 = const()[name = string("e_31_strides_0"), val = tensor([1, 1])]; tensor e_31_pad_0 = const()[name = string("e_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_31_dilations_0 = const()[name = string("e_31_dilations_0"), val = tensor([1, 1])]; int32 e_31_groups_0 = const()[name = string("e_31_groups_0"), val = int32(1)]; tensor e_31 = conv(dilations = e_31_dilations_0, groups = e_31_groups_0, pad = e_31_pad_0, pad_type = e_31_pad_type_0, strides = e_31_strides_0, weight = model_model_layers_15_mlp_down_proj_weight_palettized, x = input_287)[name = string("e_31")]; tensor var_9722_axes_0 = const()[name = string("op_9722_axes_0"), val = tensor([2])]; tensor var_9722 = squeeze(axes = var_9722_axes_0, x = e_31)[name = string("op_9722")]; tensor var_9723 = const()[name = string("op_9723"), val = tensor([0, 2, 1])]; tensor var_9724 = transpose(perm = var_9723, x = var_9722)[name = string("transpose_108")]; tensor hidden_states_97_cast_fp16 = add(x = hidden_states_95_cast_fp16, y = var_9724)[name = string("hidden_states_97_cast_fp16")]; tensor mean_129_axes_0 = const()[name = string("mean_129_axes_0"), val = tensor([-1])]; bool mean_129_keep_dims_0 = const()[name = string("mean_129_keep_dims_0"), val = bool(true)]; tensor mean_129_cast_fp16 = reduce_mean(axes = mean_129_axes_0, keep_dims = mean_129_keep_dims_0, x = hidden_states_97_cast_fp16)[name = string("mean_129_cast_fp16")]; tensor input_289_cast_fp16 = sub(x = hidden_states_97_cast_fp16, y = mean_129_cast_fp16)[name = string("input_289_cast_fp16")]; tensor var_9742_axes_0 = const()[name = string("op_9742_axes_0"), val = tensor([-1])]; tensor model_model_layers_16_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_16_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1061719232)))]; fp16 var_9730_to_fp16 = const()[name = string("op_9730_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9742_cast_fp16 = layer_norm(axes = var_9742_axes_0, epsilon = var_9730_to_fp16, gamma = model_model_layers_16_input_layernorm_weight_to_fp16, x = input_289_cast_fp16)[name = string("op_9742_cast_fp16")]; tensor var_9754 = const()[name = string("op_9754"), val = tensor([0, 2, 1])]; tensor var_9757_axes_0 = const()[name = string("op_9757_axes_0"), val = tensor([2])]; tensor var_9755 = transpose(perm = var_9754, x = var_9742_cast_fp16)[name = string("transpose_107")]; tensor var_9757 = expand_dims(axes = var_9757_axes_0, x = var_9755)[name = string("op_9757")]; string query_states_129_pad_type_0 = const()[name = string("query_states_129_pad_type_0"), val = string("valid")]; tensor query_states_129_strides_0 = const()[name = string("query_states_129_strides_0"), val = tensor([1, 1])]; tensor query_states_129_pad_0 = const()[name = string("query_states_129_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_129_dilations_0 = const()[name = string("query_states_129_dilations_0"), val = tensor([1, 1])]; int32 query_states_129_groups_0 = const()[name = string("query_states_129_groups_0"), val = int32(1)]; tensor query_states_129 = conv(dilations = query_states_129_dilations_0, groups = query_states_129_groups_0, pad = query_states_129_pad_0, pad_type = query_states_129_pad_type_0, strides = query_states_129_strides_0, weight = model_model_layers_16_self_attn_q_proj_weight_palettized, x = var_9757)[name = string("query_states_129")]; string key_states_161_pad_type_0 = const()[name = string("key_states_161_pad_type_0"), val = string("valid")]; tensor key_states_161_strides_0 = const()[name = string("key_states_161_strides_0"), val = tensor([1, 1])]; tensor key_states_161_pad_0 = const()[name = string("key_states_161_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_161_dilations_0 = const()[name = string("key_states_161_dilations_0"), val = tensor([1, 1])]; int32 key_states_161_groups_0 = const()[name = string("key_states_161_groups_0"), val = int32(1)]; tensor key_states_161 = conv(dilations = key_states_161_dilations_0, groups = key_states_161_groups_0, pad = key_states_161_pad_0, pad_type = key_states_161_pad_type_0, strides = key_states_161_strides_0, weight = model_model_layers_16_self_attn_k_proj_weight_palettized, x = var_9757)[name = string("key_states_161")]; string value_states_129_pad_type_0 = const()[name = string("value_states_129_pad_type_0"), val = string("valid")]; tensor value_states_129_strides_0 = const()[name = string("value_states_129_strides_0"), val = tensor([1, 1])]; tensor value_states_129_pad_0 = const()[name = string("value_states_129_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_129_dilations_0 = const()[name = string("value_states_129_dilations_0"), val = tensor([1, 1])]; int32 value_states_129_groups_0 = const()[name = string("value_states_129_groups_0"), val = int32(1)]; tensor value_states_129 = conv(dilations = value_states_129_dilations_0, groups = value_states_129_groups_0, pad = value_states_129_pad_0, pad_type = value_states_129_pad_type_0, strides = value_states_129_strides_0, weight = model_model_layers_16_self_attn_v_proj_weight_palettized, x = var_9757)[name = string("value_states_129")]; tensor var_9799 = const()[name = string("op_9799"), val = tensor([1, 16, 128, 64])]; tensor var_9800 = reshape(shape = var_9799, x = query_states_129)[name = string("op_9800")]; tensor var_9805 = const()[name = string("op_9805"), val = tensor([0, 1, 3, 2])]; tensor var_9810 = const()[name = string("op_9810"), val = tensor([1, 8, 128, 64])]; tensor var_9811 = reshape(shape = var_9810, x = key_states_161)[name = string("op_9811")]; tensor var_9816 = const()[name = string("op_9816"), val = tensor([0, 1, 3, 2])]; tensor var_9821 = const()[name = string("op_9821"), val = tensor([1, 8, 128, 64])]; tensor var_9822 = reshape(shape = var_9821, x = value_states_129)[name = string("op_9822")]; tensor var_9827 = const()[name = string("op_9827"), val = tensor([0, 1, 3, 2])]; tensor mean_131_axes_0 = const()[name = string("mean_131_axes_0"), val = tensor([-1])]; bool mean_131_keep_dims_0 = const()[name = string("mean_131_keep_dims_0"), val = bool(true)]; tensor x_321 = transpose(perm = var_9805, x = var_9800)[name = string("transpose_106")]; tensor mean_131 = reduce_mean(axes = mean_131_axes_0, keep_dims = mean_131_keep_dims_0, x = x_321)[name = string("mean_131")]; tensor input_293 = sub(x = x_321, y = mean_131)[name = string("input_293")]; tensor var_9844_axes_0 = const()[name = string("op_9844_axes_0"), val = tensor([-1])]; tensor model_model_layers_16_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_16_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1061723392)))]; fp16 var_9832_to_fp16 = const()[name = string("op_9832_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9844_cast_fp16 = layer_norm(axes = var_9844_axes_0, epsilon = var_9832_to_fp16, gamma = model_model_layers_16_self_attn_q_norm_weight_to_fp16, x = input_293)[name = string("op_9844_cast_fp16")]; tensor mean_133_axes_0 = const()[name = string("mean_133_axes_0"), val = tensor([-1])]; bool mean_133_keep_dims_0 = const()[name = string("mean_133_keep_dims_0"), val = bool(true)]; tensor x_323 = transpose(perm = var_9816, x = var_9811)[name = string("transpose_105")]; tensor mean_133 = reduce_mean(axes = mean_133_axes_0, keep_dims = mean_133_keep_dims_0, x = x_323)[name = string("mean_133")]; tensor input_295 = sub(x = x_323, y = mean_133)[name = string("input_295")]; tensor var_9862_axes_0 = const()[name = string("op_9862_axes_0"), val = tensor([-1])]; tensor model_model_layers_16_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_16_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1061723712)))]; fp16 var_9850_to_fp16 = const()[name = string("op_9850_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9862_cast_fp16 = layer_norm(axes = var_9862_axes_0, epsilon = var_9850_to_fp16, gamma = model_model_layers_16_self_attn_k_norm_weight_to_fp16, x = input_295)[name = string("op_9862_cast_fp16")]; tensor var_9877 = mul(x = var_9844_cast_fp16, y = cos_5)[name = string("op_9877")]; tensor x1_65_begin_0 = const()[name = string("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_65_end_0 = const()[name = string("x1_65_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_65_end_mask_0 = const()[name = string("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_65 = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = var_9844_cast_fp16)[name = string("x1_65")]; tensor x2_65_begin_0 = const()[name = string("x2_65_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_65_end_0 = const()[name = string("x2_65_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_65_end_mask_0 = const()[name = string("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_65 = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = var_9844_cast_fp16)[name = string("x2_65")]; fp16 const_359_promoted = const()[name = string("const_359_promoted"), val = fp16(-0x1p+0)]; tensor var_9898 = mul(x = x2_65, y = const_359_promoted)[name = string("op_9898")]; int32 var_9900 = const()[name = string("op_9900"), val = int32(-1)]; bool var_9901_interleave_0 = const()[name = string("op_9901_interleave_0"), val = bool(false)]; tensor var_9901 = concat(axis = var_9900, interleave = var_9901_interleave_0, values = (var_9898, x1_65))[name = string("op_9901")]; tensor var_9902 = mul(x = var_9901, y = sin_5)[name = string("op_9902")]; tensor query_states_131 = add(x = var_9877, y = var_9902)[name = string("query_states_131")]; tensor var_9905 = mul(x = var_9862_cast_fp16, y = cos_5)[name = string("op_9905")]; tensor x1_67_begin_0 = const()[name = string("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_67_end_0 = const()[name = string("x1_67_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_67_end_mask_0 = const()[name = string("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_67 = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = var_9862_cast_fp16)[name = string("x1_67")]; tensor x2_67_begin_0 = const()[name = string("x2_67_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_67_end_0 = const()[name = string("x2_67_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_67_end_mask_0 = const()[name = string("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_67 = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = var_9862_cast_fp16)[name = string("x2_67")]; fp16 const_362_promoted = const()[name = string("const_362_promoted"), val = fp16(-0x1p+0)]; tensor var_9926 = mul(x = x2_67, y = const_362_promoted)[name = string("op_9926")]; int32 var_9928 = const()[name = string("op_9928"), val = int32(-1)]; bool var_9929_interleave_0 = const()[name = string("op_9929_interleave_0"), val = bool(false)]; tensor var_9929 = concat(axis = var_9928, interleave = var_9929_interleave_0, values = (var_9926, x1_67))[name = string("op_9929")]; tensor var_9930 = mul(x = var_9929, y = sin_5)[name = string("op_9930")]; tensor key_states_163 = add(x = var_9905, y = var_9930)[name = string("key_states_163")]; tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([16])]; tensor expand_dims_193 = const()[name = string("expand_dims_193"), val = tensor([0])]; tensor expand_dims_195 = const()[name = string("expand_dims_195"), val = tensor([0])]; tensor expand_dims_196 = const()[name = string("expand_dims_196"), val = tensor([17])]; int32 concat_290_axis_0 = const()[name = string("concat_290_axis_0"), val = int32(0)]; bool concat_290_interleave_0 = const()[name = string("concat_290_interleave_0"), val = bool(false)]; tensor concat_290 = concat(axis = concat_290_axis_0, interleave = concat_290_interleave_0, values = (expand_dims_192, expand_dims_193, current_pos, expand_dims_195))[name = string("concat_290")]; tensor concat_291_values1_0 = const()[name = string("concat_291_values1_0"), val = tensor([0])]; tensor concat_291_values3_0 = const()[name = string("concat_291_values3_0"), val = tensor([0])]; int32 concat_291_axis_0 = const()[name = string("concat_291_axis_0"), val = int32(0)]; bool concat_291_interleave_0 = const()[name = string("concat_291_interleave_0"), val = bool(false)]; tensor concat_291 = concat(axis = concat_291_axis_0, interleave = concat_291_interleave_0, values = (expand_dims_196, concat_291_values1_0, var_1760, concat_291_values3_0))[name = string("concat_291")]; tensor model_model_kv_cache_0_internal_tensor_assign_33_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_33_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_33_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_33_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_33_cast_fp16 = slice_update(begin = concat_290, begin_mask = model_model_kv_cache_0_internal_tensor_assign_33_begin_mask_0, end = concat_291, end_mask = model_model_kv_cache_0_internal_tensor_assign_33_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_33_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_33_stride_0, update = key_states_163, x = coreml_update_state_87)[name = string("model_model_kv_cache_0_internal_tensor_assign_33_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_33_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_88_write_state")]; tensor coreml_update_state_88 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_88")]; tensor expand_dims_198 = const()[name = string("expand_dims_198"), val = tensor([44])]; tensor expand_dims_199 = const()[name = string("expand_dims_199"), val = tensor([0])]; tensor expand_dims_201 = const()[name = string("expand_dims_201"), val = tensor([0])]; tensor expand_dims_202 = const()[name = string("expand_dims_202"), val = tensor([45])]; int32 concat_294_axis_0 = const()[name = string("concat_294_axis_0"), val = int32(0)]; bool concat_294_interleave_0 = const()[name = string("concat_294_interleave_0"), val = bool(false)]; tensor concat_294 = concat(axis = concat_294_axis_0, interleave = concat_294_interleave_0, values = (expand_dims_198, expand_dims_199, current_pos, expand_dims_201))[name = string("concat_294")]; tensor concat_295_values1_0 = const()[name = string("concat_295_values1_0"), val = tensor([0])]; tensor concat_295_values3_0 = const()[name = string("concat_295_values3_0"), val = tensor([0])]; int32 concat_295_axis_0 = const()[name = string("concat_295_axis_0"), val = int32(0)]; bool concat_295_interleave_0 = const()[name = string("concat_295_interleave_0"), val = bool(false)]; tensor concat_295 = concat(axis = concat_295_axis_0, interleave = concat_295_interleave_0, values = (expand_dims_202, concat_295_values1_0, var_1760, concat_295_values3_0))[name = string("concat_295")]; tensor model_model_kv_cache_0_internal_tensor_assign_34_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_34_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_34_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_34_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_131 = transpose(perm = var_9827, x = var_9822)[name = string("transpose_104")]; tensor model_model_kv_cache_0_internal_tensor_assign_34_cast_fp16 = slice_update(begin = concat_294, begin_mask = model_model_kv_cache_0_internal_tensor_assign_34_begin_mask_0, end = concat_295, end_mask = model_model_kv_cache_0_internal_tensor_assign_34_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_34_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_34_stride_0, update = value_states_131, x = coreml_update_state_88)[name = string("model_model_kv_cache_0_internal_tensor_assign_34_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_34_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_89_write_state")]; tensor coreml_update_state_89 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_89")]; tensor var_10001_begin_0 = const()[name = string("op_10001_begin_0"), val = tensor([16, 0, 0, 0])]; tensor var_10001_end_0 = const()[name = string("op_10001_end_0"), val = tensor([17, 8, 1024, 128])]; tensor var_10001_end_mask_0 = const()[name = string("op_10001_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_10001_cast_fp16 = slice_by_index(begin = var_10001_begin_0, end = var_10001_end_0, end_mask = var_10001_end_mask_0, x = coreml_update_state_89)[name = string("op_10001_cast_fp16")]; tensor K_layer_cache_33_axes_0 = const()[name = string("K_layer_cache_33_axes_0"), val = tensor([0])]; tensor K_layer_cache_33_cast_fp16 = squeeze(axes = K_layer_cache_33_axes_0, x = var_10001_cast_fp16)[name = string("K_layer_cache_33_cast_fp16")]; tensor var_10008_begin_0 = const()[name = string("op_10008_begin_0"), val = tensor([44, 0, 0, 0])]; tensor var_10008_end_0 = const()[name = string("op_10008_end_0"), val = tensor([45, 8, 1024, 128])]; tensor var_10008_end_mask_0 = const()[name = string("op_10008_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_10008_cast_fp16 = slice_by_index(begin = var_10008_begin_0, end = var_10008_end_0, end_mask = var_10008_end_mask_0, x = coreml_update_state_89)[name = string("op_10008_cast_fp16")]; tensor V_layer_cache_33_axes_0 = const()[name = string("V_layer_cache_33_axes_0"), val = tensor([0])]; tensor V_layer_cache_33_cast_fp16 = squeeze(axes = V_layer_cache_33_axes_0, x = var_10008_cast_fp16)[name = string("V_layer_cache_33_cast_fp16")]; tensor x_327_axes_0 = const()[name = string("x_327_axes_0"), val = tensor([1])]; tensor x_327_cast_fp16 = expand_dims(axes = x_327_axes_0, x = K_layer_cache_33_cast_fp16)[name = string("x_327_cast_fp16")]; tensor var_10037 = const()[name = string("op_10037"), val = tensor([1, 2, 1, 1])]; tensor x_329_cast_fp16 = tile(reps = var_10037, x = x_327_cast_fp16)[name = string("x_329_cast_fp16")]; tensor var_10049 = const()[name = string("op_10049"), val = tensor([1, -1, 1024, 128])]; tensor key_states_167_cast_fp16 = reshape(shape = var_10049, x = x_329_cast_fp16)[name = string("key_states_167_cast_fp16")]; tensor x_333_axes_0 = const()[name = string("x_333_axes_0"), val = tensor([1])]; tensor x_333_cast_fp16 = expand_dims(axes = x_333_axes_0, x = V_layer_cache_33_cast_fp16)[name = string("x_333_cast_fp16")]; tensor var_10057 = const()[name = string("op_10057"), val = tensor([1, 2, 1, 1])]; tensor x_335_cast_fp16 = tile(reps = var_10057, x = x_333_cast_fp16)[name = string("x_335_cast_fp16")]; bool var_10084_transpose_x_0 = const()[name = string("op_10084_transpose_x_0"), val = bool(false)]; bool var_10084_transpose_y_0 = const()[name = string("op_10084_transpose_y_0"), val = bool(true)]; tensor var_10084 = matmul(transpose_x = var_10084_transpose_x_0, transpose_y = var_10084_transpose_y_0, x = query_states_131, y = key_states_167_cast_fp16)[name = string("op_10084")]; fp16 var_10085_to_fp16 = const()[name = string("op_10085_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_65_cast_fp16 = mul(x = var_10084, y = var_10085_to_fp16)[name = string("attn_weights_65_cast_fp16")]; tensor attn_weights_67_cast_fp16 = add(x = attn_weights_65_cast_fp16, y = causal_mask)[name = string("attn_weights_67_cast_fp16")]; int32 var_10120 = const()[name = string("op_10120"), val = int32(-1)]; tensor var_10122_cast_fp16 = softmax(axis = var_10120, x = attn_weights_67_cast_fp16)[name = string("op_10122_cast_fp16")]; tensor concat_300 = const()[name = string("concat_300"), val = tensor([16, 64, 1024])]; tensor reshape_48_cast_fp16 = reshape(shape = concat_300, x = var_10122_cast_fp16)[name = string("reshape_48_cast_fp16")]; tensor concat_301 = const()[name = string("concat_301"), val = tensor([16, 1024, 128])]; tensor reshape_49_cast_fp16 = reshape(shape = concat_301, x = x_335_cast_fp16)[name = string("reshape_49_cast_fp16")]; bool matmul_16_transpose_x_0 = const()[name = string("matmul_16_transpose_x_0"), val = bool(false)]; bool matmul_16_transpose_y_0 = const()[name = string("matmul_16_transpose_y_0"), val = bool(false)]; tensor matmul_16_cast_fp16 = matmul(transpose_x = matmul_16_transpose_x_0, transpose_y = matmul_16_transpose_y_0, x = reshape_48_cast_fp16, y = reshape_49_cast_fp16)[name = string("matmul_16_cast_fp16")]; tensor concat_305 = const()[name = string("concat_305"), val = tensor([1, 16, 64, 128])]; tensor reshape_50_cast_fp16 = reshape(shape = concat_305, x = matmul_16_cast_fp16)[name = string("reshape_50_cast_fp16")]; tensor var_10134_perm_0 = const()[name = string("op_10134_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_10153 = const()[name = string("op_10153"), val = tensor([1, 64, 2048])]; tensor var_10134_cast_fp16 = transpose(perm = var_10134_perm_0, x = reshape_50_cast_fp16)[name = string("transpose_103")]; tensor attn_output_165_cast_fp16 = reshape(shape = var_10153, x = var_10134_cast_fp16)[name = string("attn_output_165_cast_fp16")]; tensor var_10158 = const()[name = string("op_10158"), val = tensor([0, 2, 1])]; string var_10174_pad_type_0 = const()[name = string("op_10174_pad_type_0"), val = string("valid")]; int32 var_10174_groups_0 = const()[name = string("op_10174_groups_0"), val = int32(1)]; tensor var_10174_strides_0 = const()[name = string("op_10174_strides_0"), val = tensor([1])]; tensor var_10174_pad_0 = const()[name = string("op_10174_pad_0"), val = tensor([0, 0])]; tensor var_10174_dilations_0 = const()[name = string("op_10174_dilations_0"), val = tensor([1])]; tensor squeeze_16_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1061724032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1064869824))))[name = string("squeeze_16_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_10159_cast_fp16 = transpose(perm = var_10158, x = attn_output_165_cast_fp16)[name = string("transpose_102")]; tensor var_10174_cast_fp16 = conv(dilations = var_10174_dilations_0, groups = var_10174_groups_0, pad = var_10174_pad_0, pad_type = var_10174_pad_type_0, strides = var_10174_strides_0, weight = squeeze_16_cast_fp16_to_fp32_to_fp16_palettized, x = var_10159_cast_fp16)[name = string("op_10174_cast_fp16")]; tensor var_10178 = const()[name = string("op_10178"), val = tensor([0, 2, 1])]; tensor attn_output_169_cast_fp16 = transpose(perm = var_10178, x = var_10174_cast_fp16)[name = string("transpose_101")]; tensor hidden_states_101_cast_fp16 = add(x = hidden_states_97_cast_fp16, y = attn_output_169_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; tensor mean_135_axes_0 = const()[name = string("mean_135_axes_0"), val = tensor([-1])]; bool mean_135_keep_dims_0 = const()[name = string("mean_135_keep_dims_0"), val = bool(true)]; tensor mean_135_cast_fp16 = reduce_mean(axes = mean_135_axes_0, keep_dims = mean_135_keep_dims_0, x = hidden_states_101_cast_fp16)[name = string("mean_135_cast_fp16")]; tensor input_299_cast_fp16 = sub(x = hidden_states_101_cast_fp16, y = mean_135_cast_fp16)[name = string("input_299_cast_fp16")]; tensor var_10197_axes_0 = const()[name = string("op_10197_axes_0"), val = tensor([-1])]; tensor model_model_layers_16_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_16_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1064902656)))]; fp16 var_10185_to_fp16 = const()[name = string("op_10185_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10197_cast_fp16 = layer_norm(axes = var_10197_axes_0, epsilon = var_10185_to_fp16, gamma = model_model_layers_16_post_attention_layernorm_weight_to_fp16, x = input_299_cast_fp16)[name = string("op_10197_cast_fp16")]; tensor var_10211 = const()[name = string("op_10211"), val = tensor([0, 2, 1])]; tensor input_301_axes_0 = const()[name = string("input_301_axes_0"), val = tensor([2])]; tensor var_10212 = transpose(perm = var_10211, x = var_10197_cast_fp16)[name = string("transpose_100")]; tensor input_301 = expand_dims(axes = input_301_axes_0, x = var_10212)[name = string("input_301")]; string input_303_pad_type_0 = const()[name = string("input_303_pad_type_0"), val = string("valid")]; tensor input_303_strides_0 = const()[name = string("input_303_strides_0"), val = tensor([1, 1])]; tensor input_303_pad_0 = const()[name = string("input_303_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_303_dilations_0 = const()[name = string("input_303_dilations_0"), val = tensor([1, 1])]; int32 input_303_groups_0 = const()[name = string("input_303_groups_0"), val = int32(1)]; tensor input_303 = conv(dilations = input_303_dilations_0, groups = input_303_groups_0, pad = input_303_pad_0, pad_type = input_303_pad_type_0, strides = input_303_strides_0, weight = model_model_layers_16_mlp_gate_proj_weight_palettized, x = input_301)[name = string("input_303")]; string b_33_pad_type_0 = const()[name = string("b_33_pad_type_0"), val = string("valid")]; tensor b_33_strides_0 = const()[name = string("b_33_strides_0"), val = tensor([1, 1])]; tensor b_33_pad_0 = const()[name = string("b_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_33_dilations_0 = const()[name = string("b_33_dilations_0"), val = tensor([1, 1])]; int32 b_33_groups_0 = const()[name = string("b_33_groups_0"), val = int32(1)]; tensor b_33 = conv(dilations = b_33_dilations_0, groups = b_33_groups_0, pad = b_33_pad_0, pad_type = b_33_pad_type_0, strides = b_33_strides_0, weight = model_model_layers_16_mlp_up_proj_weight_palettized, x = input_301)[name = string("b_33")]; tensor c_33 = silu(x = input_303)[name = string("c_33")]; tensor input_305 = mul(x = c_33, y = b_33)[name = string("input_305")]; string e_33_pad_type_0 = const()[name = string("e_33_pad_type_0"), val = string("valid")]; tensor e_33_strides_0 = const()[name = string("e_33_strides_0"), val = tensor([1, 1])]; tensor e_33_pad_0 = const()[name = string("e_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_33_dilations_0 = const()[name = string("e_33_dilations_0"), val = tensor([1, 1])]; int32 e_33_groups_0 = const()[name = string("e_33_groups_0"), val = int32(1)]; tensor e_33 = conv(dilations = e_33_dilations_0, groups = e_33_groups_0, pad = e_33_pad_0, pad_type = e_33_pad_type_0, strides = e_33_strides_0, weight = model_model_layers_16_mlp_down_proj_weight_palettized, x = input_305)[name = string("e_33")]; tensor var_10234_axes_0 = const()[name = string("op_10234_axes_0"), val = tensor([2])]; tensor var_10234 = squeeze(axes = var_10234_axes_0, x = e_33)[name = string("op_10234")]; tensor var_10235 = const()[name = string("op_10235"), val = tensor([0, 2, 1])]; tensor var_10236 = transpose(perm = var_10235, x = var_10234)[name = string("transpose_99")]; tensor hidden_states_103_cast_fp16 = add(x = hidden_states_101_cast_fp16, y = var_10236)[name = string("hidden_states_103_cast_fp16")]; tensor mean_137_axes_0 = const()[name = string("mean_137_axes_0"), val = tensor([-1])]; bool mean_137_keep_dims_0 = const()[name = string("mean_137_keep_dims_0"), val = bool(true)]; tensor mean_137_cast_fp16 = reduce_mean(axes = mean_137_axes_0, keep_dims = mean_137_keep_dims_0, x = hidden_states_103_cast_fp16)[name = string("mean_137_cast_fp16")]; tensor input_307_cast_fp16 = sub(x = hidden_states_103_cast_fp16, y = mean_137_cast_fp16)[name = string("input_307_cast_fp16")]; tensor var_10254_axes_0 = const()[name = string("op_10254_axes_0"), val = tensor([-1])]; tensor model_model_layers_17_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_17_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1064906816)))]; fp16 var_10242_to_fp16 = const()[name = string("op_10242_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10254_cast_fp16 = layer_norm(axes = var_10254_axes_0, epsilon = var_10242_to_fp16, gamma = model_model_layers_17_input_layernorm_weight_to_fp16, x = input_307_cast_fp16)[name = string("op_10254_cast_fp16")]; tensor var_10266 = const()[name = string("op_10266"), val = tensor([0, 2, 1])]; tensor var_10269_axes_0 = const()[name = string("op_10269_axes_0"), val = tensor([2])]; tensor var_10267 = transpose(perm = var_10266, x = var_10254_cast_fp16)[name = string("transpose_98")]; tensor var_10269 = expand_dims(axes = var_10269_axes_0, x = var_10267)[name = string("op_10269")]; string query_states_137_pad_type_0 = const()[name = string("query_states_137_pad_type_0"), val = string("valid")]; tensor query_states_137_strides_0 = const()[name = string("query_states_137_strides_0"), val = tensor([1, 1])]; tensor query_states_137_pad_0 = const()[name = string("query_states_137_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_137_dilations_0 = const()[name = string("query_states_137_dilations_0"), val = tensor([1, 1])]; int32 query_states_137_groups_0 = const()[name = string("query_states_137_groups_0"), val = int32(1)]; tensor query_states_137 = conv(dilations = query_states_137_dilations_0, groups = query_states_137_groups_0, pad = query_states_137_pad_0, pad_type = query_states_137_pad_type_0, strides = query_states_137_strides_0, weight = model_model_layers_17_self_attn_q_proj_weight_palettized, x = var_10269)[name = string("query_states_137")]; string key_states_171_pad_type_0 = const()[name = string("key_states_171_pad_type_0"), val = string("valid")]; tensor key_states_171_strides_0 = const()[name = string("key_states_171_strides_0"), val = tensor([1, 1])]; tensor key_states_171_pad_0 = const()[name = string("key_states_171_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_171_dilations_0 = const()[name = string("key_states_171_dilations_0"), val = tensor([1, 1])]; int32 key_states_171_groups_0 = const()[name = string("key_states_171_groups_0"), val = int32(1)]; tensor key_states_171 = conv(dilations = key_states_171_dilations_0, groups = key_states_171_groups_0, pad = key_states_171_pad_0, pad_type = key_states_171_pad_type_0, strides = key_states_171_strides_0, weight = model_model_layers_17_self_attn_k_proj_weight_palettized, x = var_10269)[name = string("key_states_171")]; string value_states_137_pad_type_0 = const()[name = string("value_states_137_pad_type_0"), val = string("valid")]; tensor value_states_137_strides_0 = const()[name = string("value_states_137_strides_0"), val = tensor([1, 1])]; tensor value_states_137_pad_0 = const()[name = string("value_states_137_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_137_dilations_0 = const()[name = string("value_states_137_dilations_0"), val = tensor([1, 1])]; int32 value_states_137_groups_0 = const()[name = string("value_states_137_groups_0"), val = int32(1)]; tensor value_states_137 = conv(dilations = value_states_137_dilations_0, groups = value_states_137_groups_0, pad = value_states_137_pad_0, pad_type = value_states_137_pad_type_0, strides = value_states_137_strides_0, weight = model_model_layers_17_self_attn_v_proj_weight_palettized, x = var_10269)[name = string("value_states_137")]; tensor var_10311 = const()[name = string("op_10311"), val = tensor([1, 16, 128, 64])]; tensor var_10312 = reshape(shape = var_10311, x = query_states_137)[name = string("op_10312")]; tensor var_10317 = const()[name = string("op_10317"), val = tensor([0, 1, 3, 2])]; tensor var_10322 = const()[name = string("op_10322"), val = tensor([1, 8, 128, 64])]; tensor var_10323 = reshape(shape = var_10322, x = key_states_171)[name = string("op_10323")]; tensor var_10328 = const()[name = string("op_10328"), val = tensor([0, 1, 3, 2])]; tensor var_10333 = const()[name = string("op_10333"), val = tensor([1, 8, 128, 64])]; tensor var_10334 = reshape(shape = var_10333, x = value_states_137)[name = string("op_10334")]; tensor var_10339 = const()[name = string("op_10339"), val = tensor([0, 1, 3, 2])]; tensor mean_139_axes_0 = const()[name = string("mean_139_axes_0"), val = tensor([-1])]; bool mean_139_keep_dims_0 = const()[name = string("mean_139_keep_dims_0"), val = bool(true)]; tensor x_341 = transpose(perm = var_10317, x = var_10312)[name = string("transpose_97")]; tensor mean_139 = reduce_mean(axes = mean_139_axes_0, keep_dims = mean_139_keep_dims_0, x = x_341)[name = string("mean_139")]; tensor input_311 = sub(x = x_341, y = mean_139)[name = string("input_311")]; tensor var_10356_axes_0 = const()[name = string("op_10356_axes_0"), val = tensor([-1])]; tensor model_model_layers_17_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_17_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1064910976)))]; fp16 var_10344_to_fp16 = const()[name = string("op_10344_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10356_cast_fp16 = layer_norm(axes = var_10356_axes_0, epsilon = var_10344_to_fp16, gamma = model_model_layers_17_self_attn_q_norm_weight_to_fp16, x = input_311)[name = string("op_10356_cast_fp16")]; tensor mean_141_axes_0 = const()[name = string("mean_141_axes_0"), val = tensor([-1])]; bool mean_141_keep_dims_0 = const()[name = string("mean_141_keep_dims_0"), val = bool(true)]; tensor x_343 = transpose(perm = var_10328, x = var_10323)[name = string("transpose_96")]; tensor mean_141 = reduce_mean(axes = mean_141_axes_0, keep_dims = mean_141_keep_dims_0, x = x_343)[name = string("mean_141")]; tensor input_313 = sub(x = x_343, y = mean_141)[name = string("input_313")]; tensor var_10374_axes_0 = const()[name = string("op_10374_axes_0"), val = tensor([-1])]; tensor model_model_layers_17_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_17_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1064911296)))]; fp16 var_10362_to_fp16 = const()[name = string("op_10362_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10374_cast_fp16 = layer_norm(axes = var_10374_axes_0, epsilon = var_10362_to_fp16, gamma = model_model_layers_17_self_attn_k_norm_weight_to_fp16, x = input_313)[name = string("op_10374_cast_fp16")]; tensor var_10389 = mul(x = var_10356_cast_fp16, y = cos_5)[name = string("op_10389")]; tensor x1_69_begin_0 = const()[name = string("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_69_end_0 = const()[name = string("x1_69_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_69_end_mask_0 = const()[name = string("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_69 = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = var_10356_cast_fp16)[name = string("x1_69")]; tensor x2_69_begin_0 = const()[name = string("x2_69_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_69_end_0 = const()[name = string("x2_69_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_69_end_mask_0 = const()[name = string("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_69 = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = var_10356_cast_fp16)[name = string("x2_69")]; fp16 const_381_promoted = const()[name = string("const_381_promoted"), val = fp16(-0x1p+0)]; tensor var_10410 = mul(x = x2_69, y = const_381_promoted)[name = string("op_10410")]; int32 var_10412 = const()[name = string("op_10412"), val = int32(-1)]; bool var_10413_interleave_0 = const()[name = string("op_10413_interleave_0"), val = bool(false)]; tensor var_10413 = concat(axis = var_10412, interleave = var_10413_interleave_0, values = (var_10410, x1_69))[name = string("op_10413")]; tensor var_10414 = mul(x = var_10413, y = sin_5)[name = string("op_10414")]; tensor query_states_139 = add(x = var_10389, y = var_10414)[name = string("query_states_139")]; tensor var_10417 = mul(x = var_10374_cast_fp16, y = cos_5)[name = string("op_10417")]; tensor x1_71_begin_0 = const()[name = string("x1_71_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_71_end_0 = const()[name = string("x1_71_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_71_end_mask_0 = const()[name = string("x1_71_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_71 = slice_by_index(begin = x1_71_begin_0, end = x1_71_end_0, end_mask = x1_71_end_mask_0, x = var_10374_cast_fp16)[name = string("x1_71")]; tensor x2_71_begin_0 = const()[name = string("x2_71_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_71_end_0 = const()[name = string("x2_71_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_71_end_mask_0 = const()[name = string("x2_71_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_71 = slice_by_index(begin = x2_71_begin_0, end = x2_71_end_0, end_mask = x2_71_end_mask_0, x = var_10374_cast_fp16)[name = string("x2_71")]; fp16 const_384_promoted = const()[name = string("const_384_promoted"), val = fp16(-0x1p+0)]; tensor var_10438 = mul(x = x2_71, y = const_384_promoted)[name = string("op_10438")]; int32 var_10440 = const()[name = string("op_10440"), val = int32(-1)]; bool var_10441_interleave_0 = const()[name = string("op_10441_interleave_0"), val = bool(false)]; tensor var_10441 = concat(axis = var_10440, interleave = var_10441_interleave_0, values = (var_10438, x1_71))[name = string("op_10441")]; tensor var_10442 = mul(x = var_10441, y = sin_5)[name = string("op_10442")]; tensor key_states_173 = add(x = var_10417, y = var_10442)[name = string("key_states_173")]; tensor expand_dims_204 = const()[name = string("expand_dims_204"), val = tensor([17])]; tensor expand_dims_205 = const()[name = string("expand_dims_205"), val = tensor([0])]; tensor expand_dims_207 = const()[name = string("expand_dims_207"), val = tensor([0])]; tensor expand_dims_208 = const()[name = string("expand_dims_208"), val = tensor([18])]; int32 concat_308_axis_0 = const()[name = string("concat_308_axis_0"), val = int32(0)]; bool concat_308_interleave_0 = const()[name = string("concat_308_interleave_0"), val = bool(false)]; tensor concat_308 = concat(axis = concat_308_axis_0, interleave = concat_308_interleave_0, values = (expand_dims_204, expand_dims_205, current_pos, expand_dims_207))[name = string("concat_308")]; tensor concat_309_values1_0 = const()[name = string("concat_309_values1_0"), val = tensor([0])]; tensor concat_309_values3_0 = const()[name = string("concat_309_values3_0"), val = tensor([0])]; int32 concat_309_axis_0 = const()[name = string("concat_309_axis_0"), val = int32(0)]; bool concat_309_interleave_0 = const()[name = string("concat_309_interleave_0"), val = bool(false)]; tensor concat_309 = concat(axis = concat_309_axis_0, interleave = concat_309_interleave_0, values = (expand_dims_208, concat_309_values1_0, var_1760, concat_309_values3_0))[name = string("concat_309")]; tensor model_model_kv_cache_0_internal_tensor_assign_35_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_35_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_35_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_35_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_35_cast_fp16 = slice_update(begin = concat_308, begin_mask = model_model_kv_cache_0_internal_tensor_assign_35_begin_mask_0, end = concat_309, end_mask = model_model_kv_cache_0_internal_tensor_assign_35_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_35_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_35_stride_0, update = key_states_173, x = coreml_update_state_89)[name = string("model_model_kv_cache_0_internal_tensor_assign_35_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_35_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_90_write_state")]; tensor coreml_update_state_90 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_90")]; tensor expand_dims_210 = const()[name = string("expand_dims_210"), val = tensor([45])]; tensor expand_dims_211 = const()[name = string("expand_dims_211"), val = tensor([0])]; tensor expand_dims_213 = const()[name = string("expand_dims_213"), val = tensor([0])]; tensor expand_dims_214 = const()[name = string("expand_dims_214"), val = tensor([46])]; int32 concat_312_axis_0 = const()[name = string("concat_312_axis_0"), val = int32(0)]; bool concat_312_interleave_0 = const()[name = string("concat_312_interleave_0"), val = bool(false)]; tensor concat_312 = concat(axis = concat_312_axis_0, interleave = concat_312_interleave_0, values = (expand_dims_210, expand_dims_211, current_pos, expand_dims_213))[name = string("concat_312")]; tensor concat_313_values1_0 = const()[name = string("concat_313_values1_0"), val = tensor([0])]; tensor concat_313_values3_0 = const()[name = string("concat_313_values3_0"), val = tensor([0])]; int32 concat_313_axis_0 = const()[name = string("concat_313_axis_0"), val = int32(0)]; bool concat_313_interleave_0 = const()[name = string("concat_313_interleave_0"), val = bool(false)]; tensor concat_313 = concat(axis = concat_313_axis_0, interleave = concat_313_interleave_0, values = (expand_dims_214, concat_313_values1_0, var_1760, concat_313_values3_0))[name = string("concat_313")]; tensor model_model_kv_cache_0_internal_tensor_assign_36_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_36_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_36_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_36_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_139 = transpose(perm = var_10339, x = var_10334)[name = string("transpose_95")]; tensor model_model_kv_cache_0_internal_tensor_assign_36_cast_fp16 = slice_update(begin = concat_312, begin_mask = model_model_kv_cache_0_internal_tensor_assign_36_begin_mask_0, end = concat_313, end_mask = model_model_kv_cache_0_internal_tensor_assign_36_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_36_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_36_stride_0, update = value_states_139, x = coreml_update_state_90)[name = string("model_model_kv_cache_0_internal_tensor_assign_36_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_36_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_91_write_state")]; tensor coreml_update_state_91 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_91")]; tensor var_10513_begin_0 = const()[name = string("op_10513_begin_0"), val = tensor([17, 0, 0, 0])]; tensor var_10513_end_0 = const()[name = string("op_10513_end_0"), val = tensor([18, 8, 1024, 128])]; tensor var_10513_end_mask_0 = const()[name = string("op_10513_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_10513_cast_fp16 = slice_by_index(begin = var_10513_begin_0, end = var_10513_end_0, end_mask = var_10513_end_mask_0, x = coreml_update_state_91)[name = string("op_10513_cast_fp16")]; tensor K_layer_cache_35_axes_0 = const()[name = string("K_layer_cache_35_axes_0"), val = tensor([0])]; tensor K_layer_cache_35_cast_fp16 = squeeze(axes = K_layer_cache_35_axes_0, x = var_10513_cast_fp16)[name = string("K_layer_cache_35_cast_fp16")]; tensor var_10520_begin_0 = const()[name = string("op_10520_begin_0"), val = tensor([45, 0, 0, 0])]; tensor var_10520_end_0 = const()[name = string("op_10520_end_0"), val = tensor([46, 8, 1024, 128])]; tensor var_10520_end_mask_0 = const()[name = string("op_10520_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_10520_cast_fp16 = slice_by_index(begin = var_10520_begin_0, end = var_10520_end_0, end_mask = var_10520_end_mask_0, x = coreml_update_state_91)[name = string("op_10520_cast_fp16")]; tensor V_layer_cache_35_axes_0 = const()[name = string("V_layer_cache_35_axes_0"), val = tensor([0])]; tensor V_layer_cache_35_cast_fp16 = squeeze(axes = V_layer_cache_35_axes_0, x = var_10520_cast_fp16)[name = string("V_layer_cache_35_cast_fp16")]; tensor x_347_axes_0 = const()[name = string("x_347_axes_0"), val = tensor([1])]; tensor x_347_cast_fp16 = expand_dims(axes = x_347_axes_0, x = K_layer_cache_35_cast_fp16)[name = string("x_347_cast_fp16")]; tensor var_10549 = const()[name = string("op_10549"), val = tensor([1, 2, 1, 1])]; tensor x_349_cast_fp16 = tile(reps = var_10549, x = x_347_cast_fp16)[name = string("x_349_cast_fp16")]; tensor var_10561 = const()[name = string("op_10561"), val = tensor([1, -1, 1024, 128])]; tensor key_states_177_cast_fp16 = reshape(shape = var_10561, x = x_349_cast_fp16)[name = string("key_states_177_cast_fp16")]; tensor x_353_axes_0 = const()[name = string("x_353_axes_0"), val = tensor([1])]; tensor x_353_cast_fp16 = expand_dims(axes = x_353_axes_0, x = V_layer_cache_35_cast_fp16)[name = string("x_353_cast_fp16")]; tensor var_10569 = const()[name = string("op_10569"), val = tensor([1, 2, 1, 1])]; tensor x_355_cast_fp16 = tile(reps = var_10569, x = x_353_cast_fp16)[name = string("x_355_cast_fp16")]; bool var_10596_transpose_x_0 = const()[name = string("op_10596_transpose_x_0"), val = bool(false)]; bool var_10596_transpose_y_0 = const()[name = string("op_10596_transpose_y_0"), val = bool(true)]; tensor var_10596 = matmul(transpose_x = var_10596_transpose_x_0, transpose_y = var_10596_transpose_y_0, x = query_states_139, y = key_states_177_cast_fp16)[name = string("op_10596")]; fp16 var_10597_to_fp16 = const()[name = string("op_10597_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_69_cast_fp16 = mul(x = var_10596, y = var_10597_to_fp16)[name = string("attn_weights_69_cast_fp16")]; tensor attn_weights_71_cast_fp16 = add(x = attn_weights_69_cast_fp16, y = causal_mask)[name = string("attn_weights_71_cast_fp16")]; int32 var_10632 = const()[name = string("op_10632"), val = int32(-1)]; tensor var_10634_cast_fp16 = softmax(axis = var_10632, x = attn_weights_71_cast_fp16)[name = string("op_10634_cast_fp16")]; tensor concat_318 = const()[name = string("concat_318"), val = tensor([16, 64, 1024])]; tensor reshape_51_cast_fp16 = reshape(shape = concat_318, x = var_10634_cast_fp16)[name = string("reshape_51_cast_fp16")]; tensor concat_319 = const()[name = string("concat_319"), val = tensor([16, 1024, 128])]; tensor reshape_52_cast_fp16 = reshape(shape = concat_319, x = x_355_cast_fp16)[name = string("reshape_52_cast_fp16")]; bool matmul_17_transpose_x_0 = const()[name = string("matmul_17_transpose_x_0"), val = bool(false)]; bool matmul_17_transpose_y_0 = const()[name = string("matmul_17_transpose_y_0"), val = bool(false)]; tensor matmul_17_cast_fp16 = matmul(transpose_x = matmul_17_transpose_x_0, transpose_y = matmul_17_transpose_y_0, x = reshape_51_cast_fp16, y = reshape_52_cast_fp16)[name = string("matmul_17_cast_fp16")]; tensor concat_323 = const()[name = string("concat_323"), val = tensor([1, 16, 64, 128])]; tensor reshape_53_cast_fp16 = reshape(shape = concat_323, x = matmul_17_cast_fp16)[name = string("reshape_53_cast_fp16")]; tensor var_10646_perm_0 = const()[name = string("op_10646_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_10665 = const()[name = string("op_10665"), val = tensor([1, 64, 2048])]; tensor var_10646_cast_fp16 = transpose(perm = var_10646_perm_0, x = reshape_53_cast_fp16)[name = string("transpose_94")]; tensor attn_output_175_cast_fp16 = reshape(shape = var_10665, x = var_10646_cast_fp16)[name = string("attn_output_175_cast_fp16")]; tensor var_10670 = const()[name = string("op_10670"), val = tensor([0, 2, 1])]; string var_10686_pad_type_0 = const()[name = string("op_10686_pad_type_0"), val = string("valid")]; int32 var_10686_groups_0 = const()[name = string("op_10686_groups_0"), val = int32(1)]; tensor var_10686_strides_0 = const()[name = string("op_10686_strides_0"), val = tensor([1])]; tensor var_10686_pad_0 = const()[name = string("op_10686_pad_0"), val = tensor([0, 0])]; tensor var_10686_dilations_0 = const()[name = string("op_10686_dilations_0"), val = tensor([1])]; tensor squeeze_17_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1064911616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1068057408))))[name = string("squeeze_17_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_10671_cast_fp16 = transpose(perm = var_10670, x = attn_output_175_cast_fp16)[name = string("transpose_93")]; tensor var_10686_cast_fp16 = conv(dilations = var_10686_dilations_0, groups = var_10686_groups_0, pad = var_10686_pad_0, pad_type = var_10686_pad_type_0, strides = var_10686_strides_0, weight = squeeze_17_cast_fp16_to_fp32_to_fp16_palettized, x = var_10671_cast_fp16)[name = string("op_10686_cast_fp16")]; tensor var_10690 = const()[name = string("op_10690"), val = tensor([0, 2, 1])]; tensor attn_output_179_cast_fp16 = transpose(perm = var_10690, x = var_10686_cast_fp16)[name = string("transpose_92")]; tensor hidden_states_107_cast_fp16 = add(x = hidden_states_103_cast_fp16, y = attn_output_179_cast_fp16)[name = string("hidden_states_107_cast_fp16")]; tensor mean_143_axes_0 = const()[name = string("mean_143_axes_0"), val = tensor([-1])]; bool mean_143_keep_dims_0 = const()[name = string("mean_143_keep_dims_0"), val = bool(true)]; tensor mean_143_cast_fp16 = reduce_mean(axes = mean_143_axes_0, keep_dims = mean_143_keep_dims_0, x = hidden_states_107_cast_fp16)[name = string("mean_143_cast_fp16")]; tensor input_317_cast_fp16 = sub(x = hidden_states_107_cast_fp16, y = mean_143_cast_fp16)[name = string("input_317_cast_fp16")]; tensor var_10709_axes_0 = const()[name = string("op_10709_axes_0"), val = tensor([-1])]; tensor model_model_layers_17_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_17_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1068090240)))]; fp16 var_10697_to_fp16 = const()[name = string("op_10697_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10709_cast_fp16 = layer_norm(axes = var_10709_axes_0, epsilon = var_10697_to_fp16, gamma = model_model_layers_17_post_attention_layernorm_weight_to_fp16, x = input_317_cast_fp16)[name = string("op_10709_cast_fp16")]; tensor var_10723 = const()[name = string("op_10723"), val = tensor([0, 2, 1])]; tensor input_319_axes_0 = const()[name = string("input_319_axes_0"), val = tensor([2])]; tensor var_10724 = transpose(perm = var_10723, x = var_10709_cast_fp16)[name = string("transpose_91")]; tensor input_319 = expand_dims(axes = input_319_axes_0, x = var_10724)[name = string("input_319")]; string input_321_pad_type_0 = const()[name = string("input_321_pad_type_0"), val = string("valid")]; tensor input_321_strides_0 = const()[name = string("input_321_strides_0"), val = tensor([1, 1])]; tensor input_321_pad_0 = const()[name = string("input_321_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_321_dilations_0 = const()[name = string("input_321_dilations_0"), val = tensor([1, 1])]; int32 input_321_groups_0 = const()[name = string("input_321_groups_0"), val = int32(1)]; tensor input_321 = conv(dilations = input_321_dilations_0, groups = input_321_groups_0, pad = input_321_pad_0, pad_type = input_321_pad_type_0, strides = input_321_strides_0, weight = model_model_layers_17_mlp_gate_proj_weight_palettized, x = input_319)[name = string("input_321")]; string b_35_pad_type_0 = const()[name = string("b_35_pad_type_0"), val = string("valid")]; tensor b_35_strides_0 = const()[name = string("b_35_strides_0"), val = tensor([1, 1])]; tensor b_35_pad_0 = const()[name = string("b_35_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_35_dilations_0 = const()[name = string("b_35_dilations_0"), val = tensor([1, 1])]; int32 b_35_groups_0 = const()[name = string("b_35_groups_0"), val = int32(1)]; tensor b_35 = conv(dilations = b_35_dilations_0, groups = b_35_groups_0, pad = b_35_pad_0, pad_type = b_35_pad_type_0, strides = b_35_strides_0, weight = model_model_layers_17_mlp_up_proj_weight_palettized, x = input_319)[name = string("b_35")]; tensor c_35 = silu(x = input_321)[name = string("c_35")]; tensor input_323 = mul(x = c_35, y = b_35)[name = string("input_323")]; string e_35_pad_type_0 = const()[name = string("e_35_pad_type_0"), val = string("valid")]; tensor e_35_strides_0 = const()[name = string("e_35_strides_0"), val = tensor([1, 1])]; tensor e_35_pad_0 = const()[name = string("e_35_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_35_dilations_0 = const()[name = string("e_35_dilations_0"), val = tensor([1, 1])]; int32 e_35_groups_0 = const()[name = string("e_35_groups_0"), val = int32(1)]; tensor e_35 = conv(dilations = e_35_dilations_0, groups = e_35_groups_0, pad = e_35_pad_0, pad_type = e_35_pad_type_0, strides = e_35_strides_0, weight = model_model_layers_17_mlp_down_proj_weight_palettized, x = input_323)[name = string("e_35")]; tensor var_10746_axes_0 = const()[name = string("op_10746_axes_0"), val = tensor([2])]; tensor var_10746 = squeeze(axes = var_10746_axes_0, x = e_35)[name = string("op_10746")]; tensor var_10747 = const()[name = string("op_10747"), val = tensor([0, 2, 1])]; tensor var_10748 = transpose(perm = var_10747, x = var_10746)[name = string("transpose_90")]; tensor hidden_states_109_cast_fp16 = add(x = hidden_states_107_cast_fp16, y = var_10748)[name = string("hidden_states_109_cast_fp16")]; tensor mean_145_axes_0 = const()[name = string("mean_145_axes_0"), val = tensor([-1])]; bool mean_145_keep_dims_0 = const()[name = string("mean_145_keep_dims_0"), val = bool(true)]; tensor mean_145_cast_fp16 = reduce_mean(axes = mean_145_axes_0, keep_dims = mean_145_keep_dims_0, x = hidden_states_109_cast_fp16)[name = string("mean_145_cast_fp16")]; tensor input_325_cast_fp16 = sub(x = hidden_states_109_cast_fp16, y = mean_145_cast_fp16)[name = string("input_325_cast_fp16")]; tensor var_10766_axes_0 = const()[name = string("op_10766_axes_0"), val = tensor([-1])]; tensor model_model_layers_18_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_18_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1068094400)))]; fp16 var_10754_to_fp16 = const()[name = string("op_10754_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10766_cast_fp16 = layer_norm(axes = var_10766_axes_0, epsilon = var_10754_to_fp16, gamma = model_model_layers_18_input_layernorm_weight_to_fp16, x = input_325_cast_fp16)[name = string("op_10766_cast_fp16")]; tensor var_10778 = const()[name = string("op_10778"), val = tensor([0, 2, 1])]; tensor var_10781_axes_0 = const()[name = string("op_10781_axes_0"), val = tensor([2])]; tensor var_10779 = transpose(perm = var_10778, x = var_10766_cast_fp16)[name = string("transpose_89")]; tensor var_10781 = expand_dims(axes = var_10781_axes_0, x = var_10779)[name = string("op_10781")]; string query_states_145_pad_type_0 = const()[name = string("query_states_145_pad_type_0"), val = string("valid")]; tensor query_states_145_strides_0 = const()[name = string("query_states_145_strides_0"), val = tensor([1, 1])]; tensor query_states_145_pad_0 = const()[name = string("query_states_145_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_145_dilations_0 = const()[name = string("query_states_145_dilations_0"), val = tensor([1, 1])]; int32 query_states_145_groups_0 = const()[name = string("query_states_145_groups_0"), val = int32(1)]; tensor query_states_145 = conv(dilations = query_states_145_dilations_0, groups = query_states_145_groups_0, pad = query_states_145_pad_0, pad_type = query_states_145_pad_type_0, strides = query_states_145_strides_0, weight = model_model_layers_18_self_attn_q_proj_weight_palettized, x = var_10781)[name = string("query_states_145")]; string key_states_181_pad_type_0 = const()[name = string("key_states_181_pad_type_0"), val = string("valid")]; tensor key_states_181_strides_0 = const()[name = string("key_states_181_strides_0"), val = tensor([1, 1])]; tensor key_states_181_pad_0 = const()[name = string("key_states_181_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_181_dilations_0 = const()[name = string("key_states_181_dilations_0"), val = tensor([1, 1])]; int32 key_states_181_groups_0 = const()[name = string("key_states_181_groups_0"), val = int32(1)]; tensor key_states_181 = conv(dilations = key_states_181_dilations_0, groups = key_states_181_groups_0, pad = key_states_181_pad_0, pad_type = key_states_181_pad_type_0, strides = key_states_181_strides_0, weight = model_model_layers_18_self_attn_k_proj_weight_palettized, x = var_10781)[name = string("key_states_181")]; string value_states_145_pad_type_0 = const()[name = string("value_states_145_pad_type_0"), val = string("valid")]; tensor value_states_145_strides_0 = const()[name = string("value_states_145_strides_0"), val = tensor([1, 1])]; tensor value_states_145_pad_0 = const()[name = string("value_states_145_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_145_dilations_0 = const()[name = string("value_states_145_dilations_0"), val = tensor([1, 1])]; int32 value_states_145_groups_0 = const()[name = string("value_states_145_groups_0"), val = int32(1)]; tensor value_states_145 = conv(dilations = value_states_145_dilations_0, groups = value_states_145_groups_0, pad = value_states_145_pad_0, pad_type = value_states_145_pad_type_0, strides = value_states_145_strides_0, weight = model_model_layers_18_self_attn_v_proj_weight_palettized, x = var_10781)[name = string("value_states_145")]; tensor var_10823 = const()[name = string("op_10823"), val = tensor([1, 16, 128, 64])]; tensor var_10824 = reshape(shape = var_10823, x = query_states_145)[name = string("op_10824")]; tensor var_10829 = const()[name = string("op_10829"), val = tensor([0, 1, 3, 2])]; tensor var_10834 = const()[name = string("op_10834"), val = tensor([1, 8, 128, 64])]; tensor var_10835 = reshape(shape = var_10834, x = key_states_181)[name = string("op_10835")]; tensor var_10840 = const()[name = string("op_10840"), val = tensor([0, 1, 3, 2])]; tensor var_10845 = const()[name = string("op_10845"), val = tensor([1, 8, 128, 64])]; tensor var_10846 = reshape(shape = var_10845, x = value_states_145)[name = string("op_10846")]; tensor var_10851 = const()[name = string("op_10851"), val = tensor([0, 1, 3, 2])]; tensor mean_147_axes_0 = const()[name = string("mean_147_axes_0"), val = tensor([-1])]; bool mean_147_keep_dims_0 = const()[name = string("mean_147_keep_dims_0"), val = bool(true)]; tensor x_361 = transpose(perm = var_10829, x = var_10824)[name = string("transpose_88")]; tensor mean_147 = reduce_mean(axes = mean_147_axes_0, keep_dims = mean_147_keep_dims_0, x = x_361)[name = string("mean_147")]; tensor input_329 = sub(x = x_361, y = mean_147)[name = string("input_329")]; tensor var_10868_axes_0 = const()[name = string("op_10868_axes_0"), val = tensor([-1])]; tensor model_model_layers_18_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_18_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1068098560)))]; fp16 var_10856_to_fp16 = const()[name = string("op_10856_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10868_cast_fp16 = layer_norm(axes = var_10868_axes_0, epsilon = var_10856_to_fp16, gamma = model_model_layers_18_self_attn_q_norm_weight_to_fp16, x = input_329)[name = string("op_10868_cast_fp16")]; tensor mean_149_axes_0 = const()[name = string("mean_149_axes_0"), val = tensor([-1])]; bool mean_149_keep_dims_0 = const()[name = string("mean_149_keep_dims_0"), val = bool(true)]; tensor x_363 = transpose(perm = var_10840, x = var_10835)[name = string("transpose_87")]; tensor mean_149 = reduce_mean(axes = mean_149_axes_0, keep_dims = mean_149_keep_dims_0, x = x_363)[name = string("mean_149")]; tensor input_331 = sub(x = x_363, y = mean_149)[name = string("input_331")]; tensor var_10886_axes_0 = const()[name = string("op_10886_axes_0"), val = tensor([-1])]; tensor model_model_layers_18_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_18_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1068098880)))]; fp16 var_10874_to_fp16 = const()[name = string("op_10874_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10886_cast_fp16 = layer_norm(axes = var_10886_axes_0, epsilon = var_10874_to_fp16, gamma = model_model_layers_18_self_attn_k_norm_weight_to_fp16, x = input_331)[name = string("op_10886_cast_fp16")]; tensor var_10901 = mul(x = var_10868_cast_fp16, y = cos_5)[name = string("op_10901")]; tensor x1_73_begin_0 = const()[name = string("x1_73_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_73_end_0 = const()[name = string("x1_73_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_73_end_mask_0 = const()[name = string("x1_73_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_73 = slice_by_index(begin = x1_73_begin_0, end = x1_73_end_0, end_mask = x1_73_end_mask_0, x = var_10868_cast_fp16)[name = string("x1_73")]; tensor x2_73_begin_0 = const()[name = string("x2_73_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_73_end_0 = const()[name = string("x2_73_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_73_end_mask_0 = const()[name = string("x2_73_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_73 = slice_by_index(begin = x2_73_begin_0, end = x2_73_end_0, end_mask = x2_73_end_mask_0, x = var_10868_cast_fp16)[name = string("x2_73")]; fp16 const_403_promoted = const()[name = string("const_403_promoted"), val = fp16(-0x1p+0)]; tensor var_10922 = mul(x = x2_73, y = const_403_promoted)[name = string("op_10922")]; int32 var_10924 = const()[name = string("op_10924"), val = int32(-1)]; bool var_10925_interleave_0 = const()[name = string("op_10925_interleave_0"), val = bool(false)]; tensor var_10925 = concat(axis = var_10924, interleave = var_10925_interleave_0, values = (var_10922, x1_73))[name = string("op_10925")]; tensor var_10926 = mul(x = var_10925, y = sin_5)[name = string("op_10926")]; tensor query_states_147 = add(x = var_10901, y = var_10926)[name = string("query_states_147")]; tensor var_10929 = mul(x = var_10886_cast_fp16, y = cos_5)[name = string("op_10929")]; tensor x1_75_begin_0 = const()[name = string("x1_75_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_75_end_0 = const()[name = string("x1_75_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_75_end_mask_0 = const()[name = string("x1_75_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_75 = slice_by_index(begin = x1_75_begin_0, end = x1_75_end_0, end_mask = x1_75_end_mask_0, x = var_10886_cast_fp16)[name = string("x1_75")]; tensor x2_75_begin_0 = const()[name = string("x2_75_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_75_end_0 = const()[name = string("x2_75_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_75_end_mask_0 = const()[name = string("x2_75_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_75 = slice_by_index(begin = x2_75_begin_0, end = x2_75_end_0, end_mask = x2_75_end_mask_0, x = var_10886_cast_fp16)[name = string("x2_75")]; fp16 const_406_promoted = const()[name = string("const_406_promoted"), val = fp16(-0x1p+0)]; tensor var_10950 = mul(x = x2_75, y = const_406_promoted)[name = string("op_10950")]; int32 var_10952 = const()[name = string("op_10952"), val = int32(-1)]; bool var_10953_interleave_0 = const()[name = string("op_10953_interleave_0"), val = bool(false)]; tensor var_10953 = concat(axis = var_10952, interleave = var_10953_interleave_0, values = (var_10950, x1_75))[name = string("op_10953")]; tensor var_10954 = mul(x = var_10953, y = sin_5)[name = string("op_10954")]; tensor key_states_183 = add(x = var_10929, y = var_10954)[name = string("key_states_183")]; tensor expand_dims_216 = const()[name = string("expand_dims_216"), val = tensor([18])]; tensor expand_dims_217 = const()[name = string("expand_dims_217"), val = tensor([0])]; tensor expand_dims_219 = const()[name = string("expand_dims_219"), val = tensor([0])]; tensor expand_dims_220 = const()[name = string("expand_dims_220"), val = tensor([19])]; int32 concat_326_axis_0 = const()[name = string("concat_326_axis_0"), val = int32(0)]; bool concat_326_interleave_0 = const()[name = string("concat_326_interleave_0"), val = bool(false)]; tensor concat_326 = concat(axis = concat_326_axis_0, interleave = concat_326_interleave_0, values = (expand_dims_216, expand_dims_217, current_pos, expand_dims_219))[name = string("concat_326")]; tensor concat_327_values1_0 = const()[name = string("concat_327_values1_0"), val = tensor([0])]; tensor concat_327_values3_0 = const()[name = string("concat_327_values3_0"), val = tensor([0])]; int32 concat_327_axis_0 = const()[name = string("concat_327_axis_0"), val = int32(0)]; bool concat_327_interleave_0 = const()[name = string("concat_327_interleave_0"), val = bool(false)]; tensor concat_327 = concat(axis = concat_327_axis_0, interleave = concat_327_interleave_0, values = (expand_dims_220, concat_327_values1_0, var_1760, concat_327_values3_0))[name = string("concat_327")]; tensor model_model_kv_cache_0_internal_tensor_assign_37_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_37_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_37_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_37_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_37_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_37_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_37_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_37_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_37_cast_fp16 = slice_update(begin = concat_326, begin_mask = model_model_kv_cache_0_internal_tensor_assign_37_begin_mask_0, end = concat_327, end_mask = model_model_kv_cache_0_internal_tensor_assign_37_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_37_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_37_stride_0, update = key_states_183, x = coreml_update_state_91)[name = string("model_model_kv_cache_0_internal_tensor_assign_37_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_37_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_92_write_state")]; tensor coreml_update_state_92 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_92")]; tensor expand_dims_222 = const()[name = string("expand_dims_222"), val = tensor([46])]; tensor expand_dims_223 = const()[name = string("expand_dims_223"), val = tensor([0])]; tensor expand_dims_225 = const()[name = string("expand_dims_225"), val = tensor([0])]; tensor expand_dims_226 = const()[name = string("expand_dims_226"), val = tensor([47])]; int32 concat_330_axis_0 = const()[name = string("concat_330_axis_0"), val = int32(0)]; bool concat_330_interleave_0 = const()[name = string("concat_330_interleave_0"), val = bool(false)]; tensor concat_330 = concat(axis = concat_330_axis_0, interleave = concat_330_interleave_0, values = (expand_dims_222, expand_dims_223, current_pos, expand_dims_225))[name = string("concat_330")]; tensor concat_331_values1_0 = const()[name = string("concat_331_values1_0"), val = tensor([0])]; tensor concat_331_values3_0 = const()[name = string("concat_331_values3_0"), val = tensor([0])]; int32 concat_331_axis_0 = const()[name = string("concat_331_axis_0"), val = int32(0)]; bool concat_331_interleave_0 = const()[name = string("concat_331_interleave_0"), val = bool(false)]; tensor concat_331 = concat(axis = concat_331_axis_0, interleave = concat_331_interleave_0, values = (expand_dims_226, concat_331_values1_0, var_1760, concat_331_values3_0))[name = string("concat_331")]; tensor model_model_kv_cache_0_internal_tensor_assign_38_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_38_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_38_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_38_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_38_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_38_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_38_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_38_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_147 = transpose(perm = var_10851, x = var_10846)[name = string("transpose_86")]; tensor model_model_kv_cache_0_internal_tensor_assign_38_cast_fp16 = slice_update(begin = concat_330, begin_mask = model_model_kv_cache_0_internal_tensor_assign_38_begin_mask_0, end = concat_331, end_mask = model_model_kv_cache_0_internal_tensor_assign_38_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_38_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_38_stride_0, update = value_states_147, x = coreml_update_state_92)[name = string("model_model_kv_cache_0_internal_tensor_assign_38_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_38_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_93_write_state")]; tensor coreml_update_state_93 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_93")]; tensor var_11025_begin_0 = const()[name = string("op_11025_begin_0"), val = tensor([18, 0, 0, 0])]; tensor var_11025_end_0 = const()[name = string("op_11025_end_0"), val = tensor([19, 8, 1024, 128])]; tensor var_11025_end_mask_0 = const()[name = string("op_11025_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_11025_cast_fp16 = slice_by_index(begin = var_11025_begin_0, end = var_11025_end_0, end_mask = var_11025_end_mask_0, x = coreml_update_state_93)[name = string("op_11025_cast_fp16")]; tensor K_layer_cache_37_axes_0 = const()[name = string("K_layer_cache_37_axes_0"), val = tensor([0])]; tensor K_layer_cache_37_cast_fp16 = squeeze(axes = K_layer_cache_37_axes_0, x = var_11025_cast_fp16)[name = string("K_layer_cache_37_cast_fp16")]; tensor var_11032_begin_0 = const()[name = string("op_11032_begin_0"), val = tensor([46, 0, 0, 0])]; tensor var_11032_end_0 = const()[name = string("op_11032_end_0"), val = tensor([47, 8, 1024, 128])]; tensor var_11032_end_mask_0 = const()[name = string("op_11032_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_11032_cast_fp16 = slice_by_index(begin = var_11032_begin_0, end = var_11032_end_0, end_mask = var_11032_end_mask_0, x = coreml_update_state_93)[name = string("op_11032_cast_fp16")]; tensor V_layer_cache_37_axes_0 = const()[name = string("V_layer_cache_37_axes_0"), val = tensor([0])]; tensor V_layer_cache_37_cast_fp16 = squeeze(axes = V_layer_cache_37_axes_0, x = var_11032_cast_fp16)[name = string("V_layer_cache_37_cast_fp16")]; tensor x_367_axes_0 = const()[name = string("x_367_axes_0"), val = tensor([1])]; tensor x_367_cast_fp16 = expand_dims(axes = x_367_axes_0, x = K_layer_cache_37_cast_fp16)[name = string("x_367_cast_fp16")]; tensor var_11061 = const()[name = string("op_11061"), val = tensor([1, 2, 1, 1])]; tensor x_369_cast_fp16 = tile(reps = var_11061, x = x_367_cast_fp16)[name = string("x_369_cast_fp16")]; tensor var_11073 = const()[name = string("op_11073"), val = tensor([1, -1, 1024, 128])]; tensor key_states_187_cast_fp16 = reshape(shape = var_11073, x = x_369_cast_fp16)[name = string("key_states_187_cast_fp16")]; tensor x_373_axes_0 = const()[name = string("x_373_axes_0"), val = tensor([1])]; tensor x_373_cast_fp16 = expand_dims(axes = x_373_axes_0, x = V_layer_cache_37_cast_fp16)[name = string("x_373_cast_fp16")]; tensor var_11081 = const()[name = string("op_11081"), val = tensor([1, 2, 1, 1])]; tensor x_375_cast_fp16 = tile(reps = var_11081, x = x_373_cast_fp16)[name = string("x_375_cast_fp16")]; bool var_11108_transpose_x_0 = const()[name = string("op_11108_transpose_x_0"), val = bool(false)]; bool var_11108_transpose_y_0 = const()[name = string("op_11108_transpose_y_0"), val = bool(true)]; tensor var_11108 = matmul(transpose_x = var_11108_transpose_x_0, transpose_y = var_11108_transpose_y_0, x = query_states_147, y = key_states_187_cast_fp16)[name = string("op_11108")]; fp16 var_11109_to_fp16 = const()[name = string("op_11109_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_73_cast_fp16 = mul(x = var_11108, y = var_11109_to_fp16)[name = string("attn_weights_73_cast_fp16")]; tensor attn_weights_75_cast_fp16 = add(x = attn_weights_73_cast_fp16, y = causal_mask)[name = string("attn_weights_75_cast_fp16")]; int32 var_11144 = const()[name = string("op_11144"), val = int32(-1)]; tensor var_11146_cast_fp16 = softmax(axis = var_11144, x = attn_weights_75_cast_fp16)[name = string("op_11146_cast_fp16")]; tensor concat_336 = const()[name = string("concat_336"), val = tensor([16, 64, 1024])]; tensor reshape_54_cast_fp16 = reshape(shape = concat_336, x = var_11146_cast_fp16)[name = string("reshape_54_cast_fp16")]; tensor concat_337 = const()[name = string("concat_337"), val = tensor([16, 1024, 128])]; tensor reshape_55_cast_fp16 = reshape(shape = concat_337, x = x_375_cast_fp16)[name = string("reshape_55_cast_fp16")]; bool matmul_18_transpose_x_0 = const()[name = string("matmul_18_transpose_x_0"), val = bool(false)]; bool matmul_18_transpose_y_0 = const()[name = string("matmul_18_transpose_y_0"), val = bool(false)]; tensor matmul_18_cast_fp16 = matmul(transpose_x = matmul_18_transpose_x_0, transpose_y = matmul_18_transpose_y_0, x = reshape_54_cast_fp16, y = reshape_55_cast_fp16)[name = string("matmul_18_cast_fp16")]; tensor concat_341 = const()[name = string("concat_341"), val = tensor([1, 16, 64, 128])]; tensor reshape_56_cast_fp16 = reshape(shape = concat_341, x = matmul_18_cast_fp16)[name = string("reshape_56_cast_fp16")]; tensor var_11158_perm_0 = const()[name = string("op_11158_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_11177 = const()[name = string("op_11177"), val = tensor([1, 64, 2048])]; tensor var_11158_cast_fp16 = transpose(perm = var_11158_perm_0, x = reshape_56_cast_fp16)[name = string("transpose_85")]; tensor attn_output_185_cast_fp16 = reshape(shape = var_11177, x = var_11158_cast_fp16)[name = string("attn_output_185_cast_fp16")]; tensor var_11182 = const()[name = string("op_11182"), val = tensor([0, 2, 1])]; string var_11198_pad_type_0 = const()[name = string("op_11198_pad_type_0"), val = string("valid")]; int32 var_11198_groups_0 = const()[name = string("op_11198_groups_0"), val = int32(1)]; tensor var_11198_strides_0 = const()[name = string("op_11198_strides_0"), val = tensor([1])]; tensor var_11198_pad_0 = const()[name = string("op_11198_pad_0"), val = tensor([0, 0])]; tensor var_11198_dilations_0 = const()[name = string("op_11198_dilations_0"), val = tensor([1])]; tensor squeeze_18_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1068099200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1071244992))))[name = string("squeeze_18_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_11183_cast_fp16 = transpose(perm = var_11182, x = attn_output_185_cast_fp16)[name = string("transpose_84")]; tensor var_11198_cast_fp16 = conv(dilations = var_11198_dilations_0, groups = var_11198_groups_0, pad = var_11198_pad_0, pad_type = var_11198_pad_type_0, strides = var_11198_strides_0, weight = squeeze_18_cast_fp16_to_fp32_to_fp16_palettized, x = var_11183_cast_fp16)[name = string("op_11198_cast_fp16")]; tensor var_11202 = const()[name = string("op_11202"), val = tensor([0, 2, 1])]; tensor attn_output_189_cast_fp16 = transpose(perm = var_11202, x = var_11198_cast_fp16)[name = string("transpose_83")]; tensor hidden_states_113_cast_fp16 = add(x = hidden_states_109_cast_fp16, y = attn_output_189_cast_fp16)[name = string("hidden_states_113_cast_fp16")]; tensor mean_151_axes_0 = const()[name = string("mean_151_axes_0"), val = tensor([-1])]; bool mean_151_keep_dims_0 = const()[name = string("mean_151_keep_dims_0"), val = bool(true)]; tensor mean_151_cast_fp16 = reduce_mean(axes = mean_151_axes_0, keep_dims = mean_151_keep_dims_0, x = hidden_states_113_cast_fp16)[name = string("mean_151_cast_fp16")]; tensor input_335_cast_fp16 = sub(x = hidden_states_113_cast_fp16, y = mean_151_cast_fp16)[name = string("input_335_cast_fp16")]; tensor var_11221_axes_0 = const()[name = string("op_11221_axes_0"), val = tensor([-1])]; tensor model_model_layers_18_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_18_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1071277824)))]; fp16 var_11209_to_fp16 = const()[name = string("op_11209_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_11221_cast_fp16 = layer_norm(axes = var_11221_axes_0, epsilon = var_11209_to_fp16, gamma = model_model_layers_18_post_attention_layernorm_weight_to_fp16, x = input_335_cast_fp16)[name = string("op_11221_cast_fp16")]; tensor var_11235 = const()[name = string("op_11235"), val = tensor([0, 2, 1])]; tensor input_337_axes_0 = const()[name = string("input_337_axes_0"), val = tensor([2])]; tensor var_11236 = transpose(perm = var_11235, x = var_11221_cast_fp16)[name = string("transpose_82")]; tensor input_337 = expand_dims(axes = input_337_axes_0, x = var_11236)[name = string("input_337")]; string input_339_pad_type_0 = const()[name = string("input_339_pad_type_0"), val = string("valid")]; tensor input_339_strides_0 = const()[name = string("input_339_strides_0"), val = tensor([1, 1])]; tensor input_339_pad_0 = const()[name = string("input_339_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_339_dilations_0 = const()[name = string("input_339_dilations_0"), val = tensor([1, 1])]; int32 input_339_groups_0 = const()[name = string("input_339_groups_0"), val = int32(1)]; tensor input_339 = conv(dilations = input_339_dilations_0, groups = input_339_groups_0, pad = input_339_pad_0, pad_type = input_339_pad_type_0, strides = input_339_strides_0, weight = model_model_layers_18_mlp_gate_proj_weight_palettized, x = input_337)[name = string("input_339")]; string b_37_pad_type_0 = const()[name = string("b_37_pad_type_0"), val = string("valid")]; tensor b_37_strides_0 = const()[name = string("b_37_strides_0"), val = tensor([1, 1])]; tensor b_37_pad_0 = const()[name = string("b_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_37_dilations_0 = const()[name = string("b_37_dilations_0"), val = tensor([1, 1])]; int32 b_37_groups_0 = const()[name = string("b_37_groups_0"), val = int32(1)]; tensor b_37 = conv(dilations = b_37_dilations_0, groups = b_37_groups_0, pad = b_37_pad_0, pad_type = b_37_pad_type_0, strides = b_37_strides_0, weight = model_model_layers_18_mlp_up_proj_weight_palettized, x = input_337)[name = string("b_37")]; tensor c_37 = silu(x = input_339)[name = string("c_37")]; tensor input_341 = mul(x = c_37, y = b_37)[name = string("input_341")]; string e_37_pad_type_0 = const()[name = string("e_37_pad_type_0"), val = string("valid")]; tensor e_37_strides_0 = const()[name = string("e_37_strides_0"), val = tensor([1, 1])]; tensor e_37_pad_0 = const()[name = string("e_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_37_dilations_0 = const()[name = string("e_37_dilations_0"), val = tensor([1, 1])]; int32 e_37_groups_0 = const()[name = string("e_37_groups_0"), val = int32(1)]; tensor e_37 = conv(dilations = e_37_dilations_0, groups = e_37_groups_0, pad = e_37_pad_0, pad_type = e_37_pad_type_0, strides = e_37_strides_0, weight = model_model_layers_18_mlp_down_proj_weight_palettized, x = input_341)[name = string("e_37")]; tensor var_11258_axes_0 = const()[name = string("op_11258_axes_0"), val = tensor([2])]; tensor var_11258 = squeeze(axes = var_11258_axes_0, x = e_37)[name = string("op_11258")]; tensor var_11259 = const()[name = string("op_11259"), val = tensor([0, 2, 1])]; tensor var_11260 = transpose(perm = var_11259, x = var_11258)[name = string("transpose_81")]; tensor hidden_states_115_cast_fp16 = add(x = hidden_states_113_cast_fp16, y = var_11260)[name = string("hidden_states_115_cast_fp16")]; tensor mean_153_axes_0 = const()[name = string("mean_153_axes_0"), val = tensor([-1])]; bool mean_153_keep_dims_0 = const()[name = string("mean_153_keep_dims_0"), val = bool(true)]; tensor mean_153_cast_fp16 = reduce_mean(axes = mean_153_axes_0, keep_dims = mean_153_keep_dims_0, x = hidden_states_115_cast_fp16)[name = string("mean_153_cast_fp16")]; tensor input_343_cast_fp16 = sub(x = hidden_states_115_cast_fp16, y = mean_153_cast_fp16)[name = string("input_343_cast_fp16")]; tensor var_11278_axes_0 = const()[name = string("op_11278_axes_0"), val = tensor([-1])]; tensor model_model_layers_19_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_19_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1071281984)))]; fp16 var_11266_to_fp16 = const()[name = string("op_11266_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_11278_cast_fp16 = layer_norm(axes = var_11278_axes_0, epsilon = var_11266_to_fp16, gamma = model_model_layers_19_input_layernorm_weight_to_fp16, x = input_343_cast_fp16)[name = string("op_11278_cast_fp16")]; tensor var_11290 = const()[name = string("op_11290"), val = tensor([0, 2, 1])]; tensor var_11293_axes_0 = const()[name = string("op_11293_axes_0"), val = tensor([2])]; tensor var_11291 = transpose(perm = var_11290, x = var_11278_cast_fp16)[name = string("transpose_80")]; tensor var_11293 = expand_dims(axes = var_11293_axes_0, x = var_11291)[name = string("op_11293")]; string query_states_153_pad_type_0 = const()[name = string("query_states_153_pad_type_0"), val = string("valid")]; tensor query_states_153_strides_0 = const()[name = string("query_states_153_strides_0"), val = tensor([1, 1])]; tensor query_states_153_pad_0 = const()[name = string("query_states_153_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_153_dilations_0 = const()[name = string("query_states_153_dilations_0"), val = tensor([1, 1])]; int32 query_states_153_groups_0 = const()[name = string("query_states_153_groups_0"), val = int32(1)]; tensor query_states_153 = conv(dilations = query_states_153_dilations_0, groups = query_states_153_groups_0, pad = query_states_153_pad_0, pad_type = query_states_153_pad_type_0, strides = query_states_153_strides_0, weight = model_model_layers_19_self_attn_q_proj_weight_palettized, x = var_11293)[name = string("query_states_153")]; string key_states_191_pad_type_0 = const()[name = string("key_states_191_pad_type_0"), val = string("valid")]; tensor key_states_191_strides_0 = const()[name = string("key_states_191_strides_0"), val = tensor([1, 1])]; tensor key_states_191_pad_0 = const()[name = string("key_states_191_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_191_dilations_0 = const()[name = string("key_states_191_dilations_0"), val = tensor([1, 1])]; int32 key_states_191_groups_0 = const()[name = string("key_states_191_groups_0"), val = int32(1)]; tensor key_states_191 = conv(dilations = key_states_191_dilations_0, groups = key_states_191_groups_0, pad = key_states_191_pad_0, pad_type = key_states_191_pad_type_0, strides = key_states_191_strides_0, weight = model_model_layers_19_self_attn_k_proj_weight_palettized, x = var_11293)[name = string("key_states_191")]; string value_states_153_pad_type_0 = const()[name = string("value_states_153_pad_type_0"), val = string("valid")]; tensor value_states_153_strides_0 = const()[name = string("value_states_153_strides_0"), val = tensor([1, 1])]; tensor value_states_153_pad_0 = const()[name = string("value_states_153_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_153_dilations_0 = const()[name = string("value_states_153_dilations_0"), val = tensor([1, 1])]; int32 value_states_153_groups_0 = const()[name = string("value_states_153_groups_0"), val = int32(1)]; tensor value_states_153 = conv(dilations = value_states_153_dilations_0, groups = value_states_153_groups_0, pad = value_states_153_pad_0, pad_type = value_states_153_pad_type_0, strides = value_states_153_strides_0, weight = model_model_layers_19_self_attn_v_proj_weight_palettized, x = var_11293)[name = string("value_states_153")]; tensor var_11335 = const()[name = string("op_11335"), val = tensor([1, 16, 128, 64])]; tensor var_11336 = reshape(shape = var_11335, x = query_states_153)[name = string("op_11336")]; tensor var_11341 = const()[name = string("op_11341"), val = tensor([0, 1, 3, 2])]; tensor var_11346 = const()[name = string("op_11346"), val = tensor([1, 8, 128, 64])]; tensor var_11347 = reshape(shape = var_11346, x = key_states_191)[name = string("op_11347")]; tensor var_11352 = const()[name = string("op_11352"), val = tensor([0, 1, 3, 2])]; tensor var_11357 = const()[name = string("op_11357"), val = tensor([1, 8, 128, 64])]; tensor var_11358 = reshape(shape = var_11357, x = value_states_153)[name = string("op_11358")]; tensor var_11363 = const()[name = string("op_11363"), val = tensor([0, 1, 3, 2])]; tensor mean_155_axes_0 = const()[name = string("mean_155_axes_0"), val = tensor([-1])]; bool mean_155_keep_dims_0 = const()[name = string("mean_155_keep_dims_0"), val = bool(true)]; tensor x_381 = transpose(perm = var_11341, x = var_11336)[name = string("transpose_79")]; tensor mean_155 = reduce_mean(axes = mean_155_axes_0, keep_dims = mean_155_keep_dims_0, x = x_381)[name = string("mean_155")]; tensor input_347 = sub(x = x_381, y = mean_155)[name = string("input_347")]; tensor var_11380_axes_0 = const()[name = string("op_11380_axes_0"), val = tensor([-1])]; tensor model_model_layers_19_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_19_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1071286144)))]; fp16 var_11368_to_fp16 = const()[name = string("op_11368_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_11380_cast_fp16 = layer_norm(axes = var_11380_axes_0, epsilon = var_11368_to_fp16, gamma = model_model_layers_19_self_attn_q_norm_weight_to_fp16, x = input_347)[name = string("op_11380_cast_fp16")]; tensor mean_157_axes_0 = const()[name = string("mean_157_axes_0"), val = tensor([-1])]; bool mean_157_keep_dims_0 = const()[name = string("mean_157_keep_dims_0"), val = bool(true)]; tensor x_383 = transpose(perm = var_11352, x = var_11347)[name = string("transpose_78")]; tensor mean_157 = reduce_mean(axes = mean_157_axes_0, keep_dims = mean_157_keep_dims_0, x = x_383)[name = string("mean_157")]; tensor input_349 = sub(x = x_383, y = mean_157)[name = string("input_349")]; tensor var_11398_axes_0 = const()[name = string("op_11398_axes_0"), val = tensor([-1])]; tensor model_model_layers_19_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_19_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1071286464)))]; fp16 var_11386_to_fp16 = const()[name = string("op_11386_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_11398_cast_fp16 = layer_norm(axes = var_11398_axes_0, epsilon = var_11386_to_fp16, gamma = model_model_layers_19_self_attn_k_norm_weight_to_fp16, x = input_349)[name = string("op_11398_cast_fp16")]; tensor var_11413 = mul(x = var_11380_cast_fp16, y = cos_5)[name = string("op_11413")]; tensor x1_77_begin_0 = const()[name = string("x1_77_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_77_end_0 = const()[name = string("x1_77_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_77_end_mask_0 = const()[name = string("x1_77_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_77 = slice_by_index(begin = x1_77_begin_0, end = x1_77_end_0, end_mask = x1_77_end_mask_0, x = var_11380_cast_fp16)[name = string("x1_77")]; tensor x2_77_begin_0 = const()[name = string("x2_77_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_77_end_0 = const()[name = string("x2_77_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_77_end_mask_0 = const()[name = string("x2_77_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_77 = slice_by_index(begin = x2_77_begin_0, end = x2_77_end_0, end_mask = x2_77_end_mask_0, x = var_11380_cast_fp16)[name = string("x2_77")]; fp16 const_425_promoted = const()[name = string("const_425_promoted"), val = fp16(-0x1p+0)]; tensor var_11434 = mul(x = x2_77, y = const_425_promoted)[name = string("op_11434")]; int32 var_11436 = const()[name = string("op_11436"), val = int32(-1)]; bool var_11437_interleave_0 = const()[name = string("op_11437_interleave_0"), val = bool(false)]; tensor var_11437 = concat(axis = var_11436, interleave = var_11437_interleave_0, values = (var_11434, x1_77))[name = string("op_11437")]; tensor var_11438 = mul(x = var_11437, y = sin_5)[name = string("op_11438")]; tensor query_states_155 = add(x = var_11413, y = var_11438)[name = string("query_states_155")]; tensor var_11441 = mul(x = var_11398_cast_fp16, y = cos_5)[name = string("op_11441")]; tensor x1_79_begin_0 = const()[name = string("x1_79_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_79_end_0 = const()[name = string("x1_79_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_79_end_mask_0 = const()[name = string("x1_79_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_79 = slice_by_index(begin = x1_79_begin_0, end = x1_79_end_0, end_mask = x1_79_end_mask_0, x = var_11398_cast_fp16)[name = string("x1_79")]; tensor x2_79_begin_0 = const()[name = string("x2_79_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_79_end_0 = const()[name = string("x2_79_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_79_end_mask_0 = const()[name = string("x2_79_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_79 = slice_by_index(begin = x2_79_begin_0, end = x2_79_end_0, end_mask = x2_79_end_mask_0, x = var_11398_cast_fp16)[name = string("x2_79")]; fp16 const_428_promoted = const()[name = string("const_428_promoted"), val = fp16(-0x1p+0)]; tensor var_11462 = mul(x = x2_79, y = const_428_promoted)[name = string("op_11462")]; int32 var_11464 = const()[name = string("op_11464"), val = int32(-1)]; bool var_11465_interleave_0 = const()[name = string("op_11465_interleave_0"), val = bool(false)]; tensor var_11465 = concat(axis = var_11464, interleave = var_11465_interleave_0, values = (var_11462, x1_79))[name = string("op_11465")]; tensor var_11466 = mul(x = var_11465, y = sin_5)[name = string("op_11466")]; tensor key_states_193 = add(x = var_11441, y = var_11466)[name = string("key_states_193")]; tensor expand_dims_228 = const()[name = string("expand_dims_228"), val = tensor([19])]; tensor expand_dims_229 = const()[name = string("expand_dims_229"), val = tensor([0])]; tensor expand_dims_231 = const()[name = string("expand_dims_231"), val = tensor([0])]; tensor expand_dims_232 = const()[name = string("expand_dims_232"), val = tensor([20])]; int32 concat_344_axis_0 = const()[name = string("concat_344_axis_0"), val = int32(0)]; bool concat_344_interleave_0 = const()[name = string("concat_344_interleave_0"), val = bool(false)]; tensor concat_344 = concat(axis = concat_344_axis_0, interleave = concat_344_interleave_0, values = (expand_dims_228, expand_dims_229, current_pos, expand_dims_231))[name = string("concat_344")]; tensor concat_345_values1_0 = const()[name = string("concat_345_values1_0"), val = tensor([0])]; tensor concat_345_values3_0 = const()[name = string("concat_345_values3_0"), val = tensor([0])]; int32 concat_345_axis_0 = const()[name = string("concat_345_axis_0"), val = int32(0)]; bool concat_345_interleave_0 = const()[name = string("concat_345_interleave_0"), val = bool(false)]; tensor concat_345 = concat(axis = concat_345_axis_0, interleave = concat_345_interleave_0, values = (expand_dims_232, concat_345_values1_0, var_1760, concat_345_values3_0))[name = string("concat_345")]; tensor model_model_kv_cache_0_internal_tensor_assign_39_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_39_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_39_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_39_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_39_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_39_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_39_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_39_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_39_cast_fp16 = slice_update(begin = concat_344, begin_mask = model_model_kv_cache_0_internal_tensor_assign_39_begin_mask_0, end = concat_345, end_mask = model_model_kv_cache_0_internal_tensor_assign_39_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_39_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_39_stride_0, update = key_states_193, x = coreml_update_state_93)[name = string("model_model_kv_cache_0_internal_tensor_assign_39_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_39_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_94_write_state")]; tensor coreml_update_state_94 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_94")]; tensor expand_dims_234 = const()[name = string("expand_dims_234"), val = tensor([47])]; tensor expand_dims_235 = const()[name = string("expand_dims_235"), val = tensor([0])]; tensor expand_dims_237 = const()[name = string("expand_dims_237"), val = tensor([0])]; tensor expand_dims_238 = const()[name = string("expand_dims_238"), val = tensor([48])]; int32 concat_348_axis_0 = const()[name = string("concat_348_axis_0"), val = int32(0)]; bool concat_348_interleave_0 = const()[name = string("concat_348_interleave_0"), val = bool(false)]; tensor concat_348 = concat(axis = concat_348_axis_0, interleave = concat_348_interleave_0, values = (expand_dims_234, expand_dims_235, current_pos, expand_dims_237))[name = string("concat_348")]; tensor concat_349_values1_0 = const()[name = string("concat_349_values1_0"), val = tensor([0])]; tensor concat_349_values3_0 = const()[name = string("concat_349_values3_0"), val = tensor([0])]; int32 concat_349_axis_0 = const()[name = string("concat_349_axis_0"), val = int32(0)]; bool concat_349_interleave_0 = const()[name = string("concat_349_interleave_0"), val = bool(false)]; tensor concat_349 = concat(axis = concat_349_axis_0, interleave = concat_349_interleave_0, values = (expand_dims_238, concat_349_values1_0, var_1760, concat_349_values3_0))[name = string("concat_349")]; tensor model_model_kv_cache_0_internal_tensor_assign_40_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_40_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_40_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_40_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_40_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_40_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_40_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_40_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_155 = transpose(perm = var_11363, x = var_11358)[name = string("transpose_77")]; tensor model_model_kv_cache_0_internal_tensor_assign_40_cast_fp16 = slice_update(begin = concat_348, begin_mask = model_model_kv_cache_0_internal_tensor_assign_40_begin_mask_0, end = concat_349, end_mask = model_model_kv_cache_0_internal_tensor_assign_40_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_40_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_40_stride_0, update = value_states_155, x = coreml_update_state_94)[name = string("model_model_kv_cache_0_internal_tensor_assign_40_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_40_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_95_write_state")]; tensor coreml_update_state_95 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_95")]; tensor var_11537_begin_0 = const()[name = string("op_11537_begin_0"), val = tensor([19, 0, 0, 0])]; tensor var_11537_end_0 = const()[name = string("op_11537_end_0"), val = tensor([20, 8, 1024, 128])]; tensor var_11537_end_mask_0 = const()[name = string("op_11537_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_11537_cast_fp16 = slice_by_index(begin = var_11537_begin_0, end = var_11537_end_0, end_mask = var_11537_end_mask_0, x = coreml_update_state_95)[name = string("op_11537_cast_fp16")]; tensor K_layer_cache_39_axes_0 = const()[name = string("K_layer_cache_39_axes_0"), val = tensor([0])]; tensor K_layer_cache_39_cast_fp16 = squeeze(axes = K_layer_cache_39_axes_0, x = var_11537_cast_fp16)[name = string("K_layer_cache_39_cast_fp16")]; tensor var_11544_begin_0 = const()[name = string("op_11544_begin_0"), val = tensor([47, 0, 0, 0])]; tensor var_11544_end_0 = const()[name = string("op_11544_end_0"), val = tensor([48, 8, 1024, 128])]; tensor var_11544_end_mask_0 = const()[name = string("op_11544_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_11544_cast_fp16 = slice_by_index(begin = var_11544_begin_0, end = var_11544_end_0, end_mask = var_11544_end_mask_0, x = coreml_update_state_95)[name = string("op_11544_cast_fp16")]; tensor V_layer_cache_39_axes_0 = const()[name = string("V_layer_cache_39_axes_0"), val = tensor([0])]; tensor V_layer_cache_39_cast_fp16 = squeeze(axes = V_layer_cache_39_axes_0, x = var_11544_cast_fp16)[name = string("V_layer_cache_39_cast_fp16")]; tensor x_387_axes_0 = const()[name = string("x_387_axes_0"), val = tensor([1])]; tensor x_387_cast_fp16 = expand_dims(axes = x_387_axes_0, x = K_layer_cache_39_cast_fp16)[name = string("x_387_cast_fp16")]; tensor var_11573 = const()[name = string("op_11573"), val = tensor([1, 2, 1, 1])]; tensor x_389_cast_fp16 = tile(reps = var_11573, x = x_387_cast_fp16)[name = string("x_389_cast_fp16")]; tensor var_11585 = const()[name = string("op_11585"), val = tensor([1, -1, 1024, 128])]; tensor key_states_197_cast_fp16 = reshape(shape = var_11585, x = x_389_cast_fp16)[name = string("key_states_197_cast_fp16")]; tensor x_393_axes_0 = const()[name = string("x_393_axes_0"), val = tensor([1])]; tensor x_393_cast_fp16 = expand_dims(axes = x_393_axes_0, x = V_layer_cache_39_cast_fp16)[name = string("x_393_cast_fp16")]; tensor var_11593 = const()[name = string("op_11593"), val = tensor([1, 2, 1, 1])]; tensor x_395_cast_fp16 = tile(reps = var_11593, x = x_393_cast_fp16)[name = string("x_395_cast_fp16")]; bool var_11620_transpose_x_0 = const()[name = string("op_11620_transpose_x_0"), val = bool(false)]; bool var_11620_transpose_y_0 = const()[name = string("op_11620_transpose_y_0"), val = bool(true)]; tensor var_11620 = matmul(transpose_x = var_11620_transpose_x_0, transpose_y = var_11620_transpose_y_0, x = query_states_155, y = key_states_197_cast_fp16)[name = string("op_11620")]; fp16 var_11621_to_fp16 = const()[name = string("op_11621_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_77_cast_fp16 = mul(x = var_11620, y = var_11621_to_fp16)[name = string("attn_weights_77_cast_fp16")]; tensor attn_weights_79_cast_fp16 = add(x = attn_weights_77_cast_fp16, y = causal_mask)[name = string("attn_weights_79_cast_fp16")]; int32 var_11656 = const()[name = string("op_11656"), val = int32(-1)]; tensor var_11658_cast_fp16 = softmax(axis = var_11656, x = attn_weights_79_cast_fp16)[name = string("op_11658_cast_fp16")]; tensor concat_354 = const()[name = string("concat_354"), val = tensor([16, 64, 1024])]; tensor reshape_57_cast_fp16 = reshape(shape = concat_354, x = var_11658_cast_fp16)[name = string("reshape_57_cast_fp16")]; tensor concat_355 = const()[name = string("concat_355"), val = tensor([16, 1024, 128])]; tensor reshape_58_cast_fp16 = reshape(shape = concat_355, x = x_395_cast_fp16)[name = string("reshape_58_cast_fp16")]; bool matmul_19_transpose_x_0 = const()[name = string("matmul_19_transpose_x_0"), val = bool(false)]; bool matmul_19_transpose_y_0 = const()[name = string("matmul_19_transpose_y_0"), val = bool(false)]; tensor matmul_19_cast_fp16 = matmul(transpose_x = matmul_19_transpose_x_0, transpose_y = matmul_19_transpose_y_0, x = reshape_57_cast_fp16, y = reshape_58_cast_fp16)[name = string("matmul_19_cast_fp16")]; tensor concat_359 = const()[name = string("concat_359"), val = tensor([1, 16, 64, 128])]; tensor reshape_59_cast_fp16 = reshape(shape = concat_359, x = matmul_19_cast_fp16)[name = string("reshape_59_cast_fp16")]; tensor var_11670_perm_0 = const()[name = string("op_11670_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_11689 = const()[name = string("op_11689"), val = tensor([1, 64, 2048])]; tensor var_11670_cast_fp16 = transpose(perm = var_11670_perm_0, x = reshape_59_cast_fp16)[name = string("transpose_76")]; tensor attn_output_195_cast_fp16 = reshape(shape = var_11689, x = var_11670_cast_fp16)[name = string("attn_output_195_cast_fp16")]; tensor var_11694 = const()[name = string("op_11694"), val = tensor([0, 2, 1])]; string var_11710_pad_type_0 = const()[name = string("op_11710_pad_type_0"), val = string("valid")]; int32 var_11710_groups_0 = const()[name = string("op_11710_groups_0"), val = int32(1)]; tensor var_11710_strides_0 = const()[name = string("op_11710_strides_0"), val = tensor([1])]; tensor var_11710_pad_0 = const()[name = string("op_11710_pad_0"), val = tensor([0, 0])]; tensor var_11710_dilations_0 = const()[name = string("op_11710_dilations_0"), val = tensor([1])]; tensor squeeze_19_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1071286784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1074432576))))[name = string("squeeze_19_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_11695_cast_fp16 = transpose(perm = var_11694, x = attn_output_195_cast_fp16)[name = string("transpose_75")]; tensor var_11710_cast_fp16 = conv(dilations = var_11710_dilations_0, groups = var_11710_groups_0, pad = var_11710_pad_0, pad_type = var_11710_pad_type_0, strides = var_11710_strides_0, weight = squeeze_19_cast_fp16_to_fp32_to_fp16_palettized, x = var_11695_cast_fp16)[name = string("op_11710_cast_fp16")]; tensor var_11714 = const()[name = string("op_11714"), val = tensor([0, 2, 1])]; tensor attn_output_199_cast_fp16 = transpose(perm = var_11714, x = var_11710_cast_fp16)[name = string("transpose_74")]; tensor hidden_states_119_cast_fp16 = add(x = hidden_states_115_cast_fp16, y = attn_output_199_cast_fp16)[name = string("hidden_states_119_cast_fp16")]; tensor mean_159_axes_0 = const()[name = string("mean_159_axes_0"), val = tensor([-1])]; bool mean_159_keep_dims_0 = const()[name = string("mean_159_keep_dims_0"), val = bool(true)]; tensor mean_159_cast_fp16 = reduce_mean(axes = mean_159_axes_0, keep_dims = mean_159_keep_dims_0, x = hidden_states_119_cast_fp16)[name = string("mean_159_cast_fp16")]; tensor input_353_cast_fp16 = sub(x = hidden_states_119_cast_fp16, y = mean_159_cast_fp16)[name = string("input_353_cast_fp16")]; tensor var_11733_axes_0 = const()[name = string("op_11733_axes_0"), val = tensor([-1])]; tensor model_model_layers_19_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_19_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1074465408)))]; fp16 var_11721_to_fp16 = const()[name = string("op_11721_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_11733_cast_fp16 = layer_norm(axes = var_11733_axes_0, epsilon = var_11721_to_fp16, gamma = model_model_layers_19_post_attention_layernorm_weight_to_fp16, x = input_353_cast_fp16)[name = string("op_11733_cast_fp16")]; tensor var_11747 = const()[name = string("op_11747"), val = tensor([0, 2, 1])]; tensor input_355_axes_0 = const()[name = string("input_355_axes_0"), val = tensor([2])]; tensor var_11748 = transpose(perm = var_11747, x = var_11733_cast_fp16)[name = string("transpose_73")]; tensor input_355 = expand_dims(axes = input_355_axes_0, x = var_11748)[name = string("input_355")]; string input_357_pad_type_0 = const()[name = string("input_357_pad_type_0"), val = string("valid")]; tensor input_357_strides_0 = const()[name = string("input_357_strides_0"), val = tensor([1, 1])]; tensor input_357_pad_0 = const()[name = string("input_357_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_357_dilations_0 = const()[name = string("input_357_dilations_0"), val = tensor([1, 1])]; int32 input_357_groups_0 = const()[name = string("input_357_groups_0"), val = int32(1)]; tensor input_357 = conv(dilations = input_357_dilations_0, groups = input_357_groups_0, pad = input_357_pad_0, pad_type = input_357_pad_type_0, strides = input_357_strides_0, weight = model_model_layers_19_mlp_gate_proj_weight_palettized, x = input_355)[name = string("input_357")]; string b_39_pad_type_0 = const()[name = string("b_39_pad_type_0"), val = string("valid")]; tensor b_39_strides_0 = const()[name = string("b_39_strides_0"), val = tensor([1, 1])]; tensor b_39_pad_0 = const()[name = string("b_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_39_dilations_0 = const()[name = string("b_39_dilations_0"), val = tensor([1, 1])]; int32 b_39_groups_0 = const()[name = string("b_39_groups_0"), val = int32(1)]; tensor b_39 = conv(dilations = b_39_dilations_0, groups = b_39_groups_0, pad = b_39_pad_0, pad_type = b_39_pad_type_0, strides = b_39_strides_0, weight = model_model_layers_19_mlp_up_proj_weight_palettized, x = input_355)[name = string("b_39")]; tensor c_39 = silu(x = input_357)[name = string("c_39")]; tensor input_359 = mul(x = c_39, y = b_39)[name = string("input_359")]; string e_39_pad_type_0 = const()[name = string("e_39_pad_type_0"), val = string("valid")]; tensor e_39_strides_0 = const()[name = string("e_39_strides_0"), val = tensor([1, 1])]; tensor e_39_pad_0 = const()[name = string("e_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_39_dilations_0 = const()[name = string("e_39_dilations_0"), val = tensor([1, 1])]; int32 e_39_groups_0 = const()[name = string("e_39_groups_0"), val = int32(1)]; tensor e_39 = conv(dilations = e_39_dilations_0, groups = e_39_groups_0, pad = e_39_pad_0, pad_type = e_39_pad_type_0, strides = e_39_strides_0, weight = model_model_layers_19_mlp_down_proj_weight_palettized, x = input_359)[name = string("e_39")]; tensor var_11770_axes_0 = const()[name = string("op_11770_axes_0"), val = tensor([2])]; tensor var_11770 = squeeze(axes = var_11770_axes_0, x = e_39)[name = string("op_11770")]; tensor var_11771 = const()[name = string("op_11771"), val = tensor([0, 2, 1])]; tensor var_11772 = transpose(perm = var_11771, x = var_11770)[name = string("transpose_72")]; tensor hidden_states_121_cast_fp16 = add(x = hidden_states_119_cast_fp16, y = var_11772)[name = string("hidden_states_121_cast_fp16")]; tensor mean_161_axes_0 = const()[name = string("mean_161_axes_0"), val = tensor([-1])]; bool mean_161_keep_dims_0 = const()[name = string("mean_161_keep_dims_0"), val = bool(true)]; tensor mean_161_cast_fp16 = reduce_mean(axes = mean_161_axes_0, keep_dims = mean_161_keep_dims_0, x = hidden_states_121_cast_fp16)[name = string("mean_161_cast_fp16")]; tensor input_361_cast_fp16 = sub(x = hidden_states_121_cast_fp16, y = mean_161_cast_fp16)[name = string("input_361_cast_fp16")]; tensor var_11790_axes_0 = const()[name = string("op_11790_axes_0"), val = tensor([-1])]; tensor model_model_layers_20_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_20_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1074469568)))]; fp16 var_11778_to_fp16 = const()[name = string("op_11778_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_11790_cast_fp16 = layer_norm(axes = var_11790_axes_0, epsilon = var_11778_to_fp16, gamma = model_model_layers_20_input_layernorm_weight_to_fp16, x = input_361_cast_fp16)[name = string("op_11790_cast_fp16")]; tensor var_11802 = const()[name = string("op_11802"), val = tensor([0, 2, 1])]; tensor var_11805_axes_0 = const()[name = string("op_11805_axes_0"), val = tensor([2])]; tensor var_11803 = transpose(perm = var_11802, x = var_11790_cast_fp16)[name = string("transpose_71")]; tensor var_11805 = expand_dims(axes = var_11805_axes_0, x = var_11803)[name = string("op_11805")]; string query_states_161_pad_type_0 = const()[name = string("query_states_161_pad_type_0"), val = string("valid")]; tensor query_states_161_strides_0 = const()[name = string("query_states_161_strides_0"), val = tensor([1, 1])]; tensor query_states_161_pad_0 = const()[name = string("query_states_161_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_161_dilations_0 = const()[name = string("query_states_161_dilations_0"), val = tensor([1, 1])]; int32 query_states_161_groups_0 = const()[name = string("query_states_161_groups_0"), val = int32(1)]; tensor query_states_161 = conv(dilations = query_states_161_dilations_0, groups = query_states_161_groups_0, pad = query_states_161_pad_0, pad_type = query_states_161_pad_type_0, strides = query_states_161_strides_0, weight = model_model_layers_20_self_attn_q_proj_weight_palettized, x = var_11805)[name = string("query_states_161")]; string key_states_201_pad_type_0 = const()[name = string("key_states_201_pad_type_0"), val = string("valid")]; tensor key_states_201_strides_0 = const()[name = string("key_states_201_strides_0"), val = tensor([1, 1])]; tensor key_states_201_pad_0 = const()[name = string("key_states_201_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_201_dilations_0 = const()[name = string("key_states_201_dilations_0"), val = tensor([1, 1])]; int32 key_states_201_groups_0 = const()[name = string("key_states_201_groups_0"), val = int32(1)]; tensor key_states_201 = conv(dilations = key_states_201_dilations_0, groups = key_states_201_groups_0, pad = key_states_201_pad_0, pad_type = key_states_201_pad_type_0, strides = key_states_201_strides_0, weight = model_model_layers_20_self_attn_k_proj_weight_palettized, x = var_11805)[name = string("key_states_201")]; string value_states_161_pad_type_0 = const()[name = string("value_states_161_pad_type_0"), val = string("valid")]; tensor value_states_161_strides_0 = const()[name = string("value_states_161_strides_0"), val = tensor([1, 1])]; tensor value_states_161_pad_0 = const()[name = string("value_states_161_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_161_dilations_0 = const()[name = string("value_states_161_dilations_0"), val = tensor([1, 1])]; int32 value_states_161_groups_0 = const()[name = string("value_states_161_groups_0"), val = int32(1)]; tensor value_states_161 = conv(dilations = value_states_161_dilations_0, groups = value_states_161_groups_0, pad = value_states_161_pad_0, pad_type = value_states_161_pad_type_0, strides = value_states_161_strides_0, weight = model_model_layers_20_self_attn_v_proj_weight_palettized, x = var_11805)[name = string("value_states_161")]; tensor var_11847 = const()[name = string("op_11847"), val = tensor([1, 16, 128, 64])]; tensor var_11848 = reshape(shape = var_11847, x = query_states_161)[name = string("op_11848")]; tensor var_11853 = const()[name = string("op_11853"), val = tensor([0, 1, 3, 2])]; tensor var_11858 = const()[name = string("op_11858"), val = tensor([1, 8, 128, 64])]; tensor var_11859 = reshape(shape = var_11858, x = key_states_201)[name = string("op_11859")]; tensor var_11864 = const()[name = string("op_11864"), val = tensor([0, 1, 3, 2])]; tensor var_11869 = const()[name = string("op_11869"), val = tensor([1, 8, 128, 64])]; tensor var_11870 = reshape(shape = var_11869, x = value_states_161)[name = string("op_11870")]; tensor var_11875 = const()[name = string("op_11875"), val = tensor([0, 1, 3, 2])]; tensor mean_163_axes_0 = const()[name = string("mean_163_axes_0"), val = tensor([-1])]; bool mean_163_keep_dims_0 = const()[name = string("mean_163_keep_dims_0"), val = bool(true)]; tensor x_401 = transpose(perm = var_11853, x = var_11848)[name = string("transpose_70")]; tensor mean_163 = reduce_mean(axes = mean_163_axes_0, keep_dims = mean_163_keep_dims_0, x = x_401)[name = string("mean_163")]; tensor input_365 = sub(x = x_401, y = mean_163)[name = string("input_365")]; tensor var_11892_axes_0 = const()[name = string("op_11892_axes_0"), val = tensor([-1])]; tensor model_model_layers_20_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_20_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1074473728)))]; fp16 var_11880_to_fp16 = const()[name = string("op_11880_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_11892_cast_fp16 = layer_norm(axes = var_11892_axes_0, epsilon = var_11880_to_fp16, gamma = model_model_layers_20_self_attn_q_norm_weight_to_fp16, x = input_365)[name = string("op_11892_cast_fp16")]; tensor mean_165_axes_0 = const()[name = string("mean_165_axes_0"), val = tensor([-1])]; bool mean_165_keep_dims_0 = const()[name = string("mean_165_keep_dims_0"), val = bool(true)]; tensor x_403 = transpose(perm = var_11864, x = var_11859)[name = string("transpose_69")]; tensor mean_165 = reduce_mean(axes = mean_165_axes_0, keep_dims = mean_165_keep_dims_0, x = x_403)[name = string("mean_165")]; tensor input_367 = sub(x = x_403, y = mean_165)[name = string("input_367")]; tensor var_11910_axes_0 = const()[name = string("op_11910_axes_0"), val = tensor([-1])]; tensor model_model_layers_20_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_20_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1074474048)))]; fp16 var_11898_to_fp16 = const()[name = string("op_11898_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_11910_cast_fp16 = layer_norm(axes = var_11910_axes_0, epsilon = var_11898_to_fp16, gamma = model_model_layers_20_self_attn_k_norm_weight_to_fp16, x = input_367)[name = string("op_11910_cast_fp16")]; tensor var_11925 = mul(x = var_11892_cast_fp16, y = cos_5)[name = string("op_11925")]; tensor x1_81_begin_0 = const()[name = string("x1_81_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_81_end_0 = const()[name = string("x1_81_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_81_end_mask_0 = const()[name = string("x1_81_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_81 = slice_by_index(begin = x1_81_begin_0, end = x1_81_end_0, end_mask = x1_81_end_mask_0, x = var_11892_cast_fp16)[name = string("x1_81")]; tensor x2_81_begin_0 = const()[name = string("x2_81_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_81_end_0 = const()[name = string("x2_81_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_81_end_mask_0 = const()[name = string("x2_81_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_81 = slice_by_index(begin = x2_81_begin_0, end = x2_81_end_0, end_mask = x2_81_end_mask_0, x = var_11892_cast_fp16)[name = string("x2_81")]; fp16 const_447_promoted = const()[name = string("const_447_promoted"), val = fp16(-0x1p+0)]; tensor var_11946 = mul(x = x2_81, y = const_447_promoted)[name = string("op_11946")]; int32 var_11948 = const()[name = string("op_11948"), val = int32(-1)]; bool var_11949_interleave_0 = const()[name = string("op_11949_interleave_0"), val = bool(false)]; tensor var_11949 = concat(axis = var_11948, interleave = var_11949_interleave_0, values = (var_11946, x1_81))[name = string("op_11949")]; tensor var_11950 = mul(x = var_11949, y = sin_5)[name = string("op_11950")]; tensor query_states_163 = add(x = var_11925, y = var_11950)[name = string("query_states_163")]; tensor var_11953 = mul(x = var_11910_cast_fp16, y = cos_5)[name = string("op_11953")]; tensor x1_83_begin_0 = const()[name = string("x1_83_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_83_end_0 = const()[name = string("x1_83_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_83_end_mask_0 = const()[name = string("x1_83_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_83 = slice_by_index(begin = x1_83_begin_0, end = x1_83_end_0, end_mask = x1_83_end_mask_0, x = var_11910_cast_fp16)[name = string("x1_83")]; tensor x2_83_begin_0 = const()[name = string("x2_83_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_83_end_0 = const()[name = string("x2_83_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_83_end_mask_0 = const()[name = string("x2_83_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_83 = slice_by_index(begin = x2_83_begin_0, end = x2_83_end_0, end_mask = x2_83_end_mask_0, x = var_11910_cast_fp16)[name = string("x2_83")]; fp16 const_450_promoted = const()[name = string("const_450_promoted"), val = fp16(-0x1p+0)]; tensor var_11974 = mul(x = x2_83, y = const_450_promoted)[name = string("op_11974")]; int32 var_11976 = const()[name = string("op_11976"), val = int32(-1)]; bool var_11977_interleave_0 = const()[name = string("op_11977_interleave_0"), val = bool(false)]; tensor var_11977 = concat(axis = var_11976, interleave = var_11977_interleave_0, values = (var_11974, x1_83))[name = string("op_11977")]; tensor var_11978 = mul(x = var_11977, y = sin_5)[name = string("op_11978")]; tensor key_states_203 = add(x = var_11953, y = var_11978)[name = string("key_states_203")]; tensor expand_dims_240 = const()[name = string("expand_dims_240"), val = tensor([20])]; tensor expand_dims_241 = const()[name = string("expand_dims_241"), val = tensor([0])]; tensor expand_dims_243 = const()[name = string("expand_dims_243"), val = tensor([0])]; tensor expand_dims_244 = const()[name = string("expand_dims_244"), val = tensor([21])]; int32 concat_362_axis_0 = const()[name = string("concat_362_axis_0"), val = int32(0)]; bool concat_362_interleave_0 = const()[name = string("concat_362_interleave_0"), val = bool(false)]; tensor concat_362 = concat(axis = concat_362_axis_0, interleave = concat_362_interleave_0, values = (expand_dims_240, expand_dims_241, current_pos, expand_dims_243))[name = string("concat_362")]; tensor concat_363_values1_0 = const()[name = string("concat_363_values1_0"), val = tensor([0])]; tensor concat_363_values3_0 = const()[name = string("concat_363_values3_0"), val = tensor([0])]; int32 concat_363_axis_0 = const()[name = string("concat_363_axis_0"), val = int32(0)]; bool concat_363_interleave_0 = const()[name = string("concat_363_interleave_0"), val = bool(false)]; tensor concat_363 = concat(axis = concat_363_axis_0, interleave = concat_363_interleave_0, values = (expand_dims_244, concat_363_values1_0, var_1760, concat_363_values3_0))[name = string("concat_363")]; tensor model_model_kv_cache_0_internal_tensor_assign_41_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_41_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_41_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_41_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_41_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_41_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_41_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_41_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_41_cast_fp16 = slice_update(begin = concat_362, begin_mask = model_model_kv_cache_0_internal_tensor_assign_41_begin_mask_0, end = concat_363, end_mask = model_model_kv_cache_0_internal_tensor_assign_41_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_41_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_41_stride_0, update = key_states_203, x = coreml_update_state_95)[name = string("model_model_kv_cache_0_internal_tensor_assign_41_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_41_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_96_write_state")]; tensor coreml_update_state_96 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_96")]; tensor expand_dims_246 = const()[name = string("expand_dims_246"), val = tensor([48])]; tensor expand_dims_247 = const()[name = string("expand_dims_247"), val = tensor([0])]; tensor expand_dims_249 = const()[name = string("expand_dims_249"), val = tensor([0])]; tensor expand_dims_250 = const()[name = string("expand_dims_250"), val = tensor([49])]; int32 concat_366_axis_0 = const()[name = string("concat_366_axis_0"), val = int32(0)]; bool concat_366_interleave_0 = const()[name = string("concat_366_interleave_0"), val = bool(false)]; tensor concat_366 = concat(axis = concat_366_axis_0, interleave = concat_366_interleave_0, values = (expand_dims_246, expand_dims_247, current_pos, expand_dims_249))[name = string("concat_366")]; tensor concat_367_values1_0 = const()[name = string("concat_367_values1_0"), val = tensor([0])]; tensor concat_367_values3_0 = const()[name = string("concat_367_values3_0"), val = tensor([0])]; int32 concat_367_axis_0 = const()[name = string("concat_367_axis_0"), val = int32(0)]; bool concat_367_interleave_0 = const()[name = string("concat_367_interleave_0"), val = bool(false)]; tensor concat_367 = concat(axis = concat_367_axis_0, interleave = concat_367_interleave_0, values = (expand_dims_250, concat_367_values1_0, var_1760, concat_367_values3_0))[name = string("concat_367")]; tensor model_model_kv_cache_0_internal_tensor_assign_42_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_42_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_42_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_42_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_42_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_42_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_42_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_42_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_163 = transpose(perm = var_11875, x = var_11870)[name = string("transpose_68")]; tensor model_model_kv_cache_0_internal_tensor_assign_42_cast_fp16 = slice_update(begin = concat_366, begin_mask = model_model_kv_cache_0_internal_tensor_assign_42_begin_mask_0, end = concat_367, end_mask = model_model_kv_cache_0_internal_tensor_assign_42_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_42_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_42_stride_0, update = value_states_163, x = coreml_update_state_96)[name = string("model_model_kv_cache_0_internal_tensor_assign_42_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_42_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_97_write_state")]; tensor coreml_update_state_97 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_97")]; tensor var_12049_begin_0 = const()[name = string("op_12049_begin_0"), val = tensor([20, 0, 0, 0])]; tensor var_12049_end_0 = const()[name = string("op_12049_end_0"), val = tensor([21, 8, 1024, 128])]; tensor var_12049_end_mask_0 = const()[name = string("op_12049_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_12049_cast_fp16 = slice_by_index(begin = var_12049_begin_0, end = var_12049_end_0, end_mask = var_12049_end_mask_0, x = coreml_update_state_97)[name = string("op_12049_cast_fp16")]; tensor K_layer_cache_41_axes_0 = const()[name = string("K_layer_cache_41_axes_0"), val = tensor([0])]; tensor K_layer_cache_41_cast_fp16 = squeeze(axes = K_layer_cache_41_axes_0, x = var_12049_cast_fp16)[name = string("K_layer_cache_41_cast_fp16")]; tensor var_12056_begin_0 = const()[name = string("op_12056_begin_0"), val = tensor([48, 0, 0, 0])]; tensor var_12056_end_0 = const()[name = string("op_12056_end_0"), val = tensor([49, 8, 1024, 128])]; tensor var_12056_end_mask_0 = const()[name = string("op_12056_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_12056_cast_fp16 = slice_by_index(begin = var_12056_begin_0, end = var_12056_end_0, end_mask = var_12056_end_mask_0, x = coreml_update_state_97)[name = string("op_12056_cast_fp16")]; tensor V_layer_cache_41_axes_0 = const()[name = string("V_layer_cache_41_axes_0"), val = tensor([0])]; tensor V_layer_cache_41_cast_fp16 = squeeze(axes = V_layer_cache_41_axes_0, x = var_12056_cast_fp16)[name = string("V_layer_cache_41_cast_fp16")]; tensor x_407_axes_0 = const()[name = string("x_407_axes_0"), val = tensor([1])]; tensor x_407_cast_fp16 = expand_dims(axes = x_407_axes_0, x = K_layer_cache_41_cast_fp16)[name = string("x_407_cast_fp16")]; tensor var_12085 = const()[name = string("op_12085"), val = tensor([1, 2, 1, 1])]; tensor x_409_cast_fp16 = tile(reps = var_12085, x = x_407_cast_fp16)[name = string("x_409_cast_fp16")]; tensor var_12097 = const()[name = string("op_12097"), val = tensor([1, -1, 1024, 128])]; tensor key_states_207_cast_fp16 = reshape(shape = var_12097, x = x_409_cast_fp16)[name = string("key_states_207_cast_fp16")]; tensor x_413_axes_0 = const()[name = string("x_413_axes_0"), val = tensor([1])]; tensor x_413_cast_fp16 = expand_dims(axes = x_413_axes_0, x = V_layer_cache_41_cast_fp16)[name = string("x_413_cast_fp16")]; tensor var_12105 = const()[name = string("op_12105"), val = tensor([1, 2, 1, 1])]; tensor x_415_cast_fp16 = tile(reps = var_12105, x = x_413_cast_fp16)[name = string("x_415_cast_fp16")]; bool var_12132_transpose_x_0 = const()[name = string("op_12132_transpose_x_0"), val = bool(false)]; bool var_12132_transpose_y_0 = const()[name = string("op_12132_transpose_y_0"), val = bool(true)]; tensor var_12132 = matmul(transpose_x = var_12132_transpose_x_0, transpose_y = var_12132_transpose_y_0, x = query_states_163, y = key_states_207_cast_fp16)[name = string("op_12132")]; fp16 var_12133_to_fp16 = const()[name = string("op_12133_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_81_cast_fp16 = mul(x = var_12132, y = var_12133_to_fp16)[name = string("attn_weights_81_cast_fp16")]; tensor attn_weights_83_cast_fp16 = add(x = attn_weights_81_cast_fp16, y = causal_mask)[name = string("attn_weights_83_cast_fp16")]; int32 var_12168 = const()[name = string("op_12168"), val = int32(-1)]; tensor var_12170_cast_fp16 = softmax(axis = var_12168, x = attn_weights_83_cast_fp16)[name = string("op_12170_cast_fp16")]; tensor concat_372 = const()[name = string("concat_372"), val = tensor([16, 64, 1024])]; tensor reshape_60_cast_fp16 = reshape(shape = concat_372, x = var_12170_cast_fp16)[name = string("reshape_60_cast_fp16")]; tensor concat_373 = const()[name = string("concat_373"), val = tensor([16, 1024, 128])]; tensor reshape_61_cast_fp16 = reshape(shape = concat_373, x = x_415_cast_fp16)[name = string("reshape_61_cast_fp16")]; bool matmul_20_transpose_x_0 = const()[name = string("matmul_20_transpose_x_0"), val = bool(false)]; bool matmul_20_transpose_y_0 = const()[name = string("matmul_20_transpose_y_0"), val = bool(false)]; tensor matmul_20_cast_fp16 = matmul(transpose_x = matmul_20_transpose_x_0, transpose_y = matmul_20_transpose_y_0, x = reshape_60_cast_fp16, y = reshape_61_cast_fp16)[name = string("matmul_20_cast_fp16")]; tensor concat_377 = const()[name = string("concat_377"), val = tensor([1, 16, 64, 128])]; tensor reshape_62_cast_fp16 = reshape(shape = concat_377, x = matmul_20_cast_fp16)[name = string("reshape_62_cast_fp16")]; tensor var_12182_perm_0 = const()[name = string("op_12182_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_12201 = const()[name = string("op_12201"), val = tensor([1, 64, 2048])]; tensor var_12182_cast_fp16 = transpose(perm = var_12182_perm_0, x = reshape_62_cast_fp16)[name = string("transpose_67")]; tensor attn_output_205_cast_fp16 = reshape(shape = var_12201, x = var_12182_cast_fp16)[name = string("attn_output_205_cast_fp16")]; tensor var_12206 = const()[name = string("op_12206"), val = tensor([0, 2, 1])]; string var_12222_pad_type_0 = const()[name = string("op_12222_pad_type_0"), val = string("valid")]; int32 var_12222_groups_0 = const()[name = string("op_12222_groups_0"), val = int32(1)]; tensor var_12222_strides_0 = const()[name = string("op_12222_strides_0"), val = tensor([1])]; tensor var_12222_pad_0 = const()[name = string("op_12222_pad_0"), val = tensor([0, 0])]; tensor var_12222_dilations_0 = const()[name = string("op_12222_dilations_0"), val = tensor([1])]; tensor squeeze_20_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1074474368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1077620160))))[name = string("squeeze_20_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_12207_cast_fp16 = transpose(perm = var_12206, x = attn_output_205_cast_fp16)[name = string("transpose_66")]; tensor var_12222_cast_fp16 = conv(dilations = var_12222_dilations_0, groups = var_12222_groups_0, pad = var_12222_pad_0, pad_type = var_12222_pad_type_0, strides = var_12222_strides_0, weight = squeeze_20_cast_fp16_to_fp32_to_fp16_palettized, x = var_12207_cast_fp16)[name = string("op_12222_cast_fp16")]; tensor var_12226 = const()[name = string("op_12226"), val = tensor([0, 2, 1])]; tensor attn_output_209_cast_fp16 = transpose(perm = var_12226, x = var_12222_cast_fp16)[name = string("transpose_65")]; tensor hidden_states_125_cast_fp16 = add(x = hidden_states_121_cast_fp16, y = attn_output_209_cast_fp16)[name = string("hidden_states_125_cast_fp16")]; tensor mean_167_axes_0 = const()[name = string("mean_167_axes_0"), val = tensor([-1])]; bool mean_167_keep_dims_0 = const()[name = string("mean_167_keep_dims_0"), val = bool(true)]; tensor mean_167_cast_fp16 = reduce_mean(axes = mean_167_axes_0, keep_dims = mean_167_keep_dims_0, x = hidden_states_125_cast_fp16)[name = string("mean_167_cast_fp16")]; tensor input_371_cast_fp16 = sub(x = hidden_states_125_cast_fp16, y = mean_167_cast_fp16)[name = string("input_371_cast_fp16")]; tensor var_12245_axes_0 = const()[name = string("op_12245_axes_0"), val = tensor([-1])]; tensor model_model_layers_20_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_20_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1077652992)))]; fp16 var_12233_to_fp16 = const()[name = string("op_12233_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_12245_cast_fp16 = layer_norm(axes = var_12245_axes_0, epsilon = var_12233_to_fp16, gamma = model_model_layers_20_post_attention_layernorm_weight_to_fp16, x = input_371_cast_fp16)[name = string("op_12245_cast_fp16")]; tensor var_12259 = const()[name = string("op_12259"), val = tensor([0, 2, 1])]; tensor input_373_axes_0 = const()[name = string("input_373_axes_0"), val = tensor([2])]; tensor var_12260 = transpose(perm = var_12259, x = var_12245_cast_fp16)[name = string("transpose_64")]; tensor input_373 = expand_dims(axes = input_373_axes_0, x = var_12260)[name = string("input_373")]; string input_375_pad_type_0 = const()[name = string("input_375_pad_type_0"), val = string("valid")]; tensor input_375_strides_0 = const()[name = string("input_375_strides_0"), val = tensor([1, 1])]; tensor input_375_pad_0 = const()[name = string("input_375_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_375_dilations_0 = const()[name = string("input_375_dilations_0"), val = tensor([1, 1])]; int32 input_375_groups_0 = const()[name = string("input_375_groups_0"), val = int32(1)]; tensor input_375 = conv(dilations = input_375_dilations_0, groups = input_375_groups_0, pad = input_375_pad_0, pad_type = input_375_pad_type_0, strides = input_375_strides_0, weight = model_model_layers_20_mlp_gate_proj_weight_palettized, x = input_373)[name = string("input_375")]; string b_41_pad_type_0 = const()[name = string("b_41_pad_type_0"), val = string("valid")]; tensor b_41_strides_0 = const()[name = string("b_41_strides_0"), val = tensor([1, 1])]; tensor b_41_pad_0 = const()[name = string("b_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_41_dilations_0 = const()[name = string("b_41_dilations_0"), val = tensor([1, 1])]; int32 b_41_groups_0 = const()[name = string("b_41_groups_0"), val = int32(1)]; tensor b_41 = conv(dilations = b_41_dilations_0, groups = b_41_groups_0, pad = b_41_pad_0, pad_type = b_41_pad_type_0, strides = b_41_strides_0, weight = model_model_layers_20_mlp_up_proj_weight_palettized, x = input_373)[name = string("b_41")]; tensor c_41 = silu(x = input_375)[name = string("c_41")]; tensor input_377 = mul(x = c_41, y = b_41)[name = string("input_377")]; string e_41_pad_type_0 = const()[name = string("e_41_pad_type_0"), val = string("valid")]; tensor e_41_strides_0 = const()[name = string("e_41_strides_0"), val = tensor([1, 1])]; tensor e_41_pad_0 = const()[name = string("e_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_41_dilations_0 = const()[name = string("e_41_dilations_0"), val = tensor([1, 1])]; int32 e_41_groups_0 = const()[name = string("e_41_groups_0"), val = int32(1)]; tensor e_41 = conv(dilations = e_41_dilations_0, groups = e_41_groups_0, pad = e_41_pad_0, pad_type = e_41_pad_type_0, strides = e_41_strides_0, weight = model_model_layers_20_mlp_down_proj_weight_palettized, x = input_377)[name = string("e_41")]; tensor var_12282_axes_0 = const()[name = string("op_12282_axes_0"), val = tensor([2])]; tensor var_12282 = squeeze(axes = var_12282_axes_0, x = e_41)[name = string("op_12282")]; tensor var_12283 = const()[name = string("op_12283"), val = tensor([0, 2, 1])]; tensor var_12284 = transpose(perm = var_12283, x = var_12282)[name = string("transpose_63")]; tensor hidden_states_127_cast_fp16 = add(x = hidden_states_125_cast_fp16, y = var_12284)[name = string("hidden_states_127_cast_fp16")]; tensor mean_169_axes_0 = const()[name = string("mean_169_axes_0"), val = tensor([-1])]; bool mean_169_keep_dims_0 = const()[name = string("mean_169_keep_dims_0"), val = bool(true)]; tensor mean_169_cast_fp16 = reduce_mean(axes = mean_169_axes_0, keep_dims = mean_169_keep_dims_0, x = hidden_states_127_cast_fp16)[name = string("mean_169_cast_fp16")]; tensor input_379_cast_fp16 = sub(x = hidden_states_127_cast_fp16, y = mean_169_cast_fp16)[name = string("input_379_cast_fp16")]; tensor var_12302_axes_0 = const()[name = string("op_12302_axes_0"), val = tensor([-1])]; tensor model_model_layers_21_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_21_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1077657152)))]; fp16 var_12290_to_fp16 = const()[name = string("op_12290_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_12302_cast_fp16 = layer_norm(axes = var_12302_axes_0, epsilon = var_12290_to_fp16, gamma = model_model_layers_21_input_layernorm_weight_to_fp16, x = input_379_cast_fp16)[name = string("op_12302_cast_fp16")]; tensor var_12314 = const()[name = string("op_12314"), val = tensor([0, 2, 1])]; tensor var_12317_axes_0 = const()[name = string("op_12317_axes_0"), val = tensor([2])]; tensor var_12315 = transpose(perm = var_12314, x = var_12302_cast_fp16)[name = string("transpose_62")]; tensor var_12317 = expand_dims(axes = var_12317_axes_0, x = var_12315)[name = string("op_12317")]; string query_states_169_pad_type_0 = const()[name = string("query_states_169_pad_type_0"), val = string("valid")]; tensor query_states_169_strides_0 = const()[name = string("query_states_169_strides_0"), val = tensor([1, 1])]; tensor query_states_169_pad_0 = const()[name = string("query_states_169_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_169_dilations_0 = const()[name = string("query_states_169_dilations_0"), val = tensor([1, 1])]; int32 query_states_169_groups_0 = const()[name = string("query_states_169_groups_0"), val = int32(1)]; tensor query_states_169 = conv(dilations = query_states_169_dilations_0, groups = query_states_169_groups_0, pad = query_states_169_pad_0, pad_type = query_states_169_pad_type_0, strides = query_states_169_strides_0, weight = model_model_layers_21_self_attn_q_proj_weight_palettized, x = var_12317)[name = string("query_states_169")]; string key_states_211_pad_type_0 = const()[name = string("key_states_211_pad_type_0"), val = string("valid")]; tensor key_states_211_strides_0 = const()[name = string("key_states_211_strides_0"), val = tensor([1, 1])]; tensor key_states_211_pad_0 = const()[name = string("key_states_211_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_211_dilations_0 = const()[name = string("key_states_211_dilations_0"), val = tensor([1, 1])]; int32 key_states_211_groups_0 = const()[name = string("key_states_211_groups_0"), val = int32(1)]; tensor key_states_211 = conv(dilations = key_states_211_dilations_0, groups = key_states_211_groups_0, pad = key_states_211_pad_0, pad_type = key_states_211_pad_type_0, strides = key_states_211_strides_0, weight = model_model_layers_21_self_attn_k_proj_weight_palettized, x = var_12317)[name = string("key_states_211")]; string value_states_169_pad_type_0 = const()[name = string("value_states_169_pad_type_0"), val = string("valid")]; tensor value_states_169_strides_0 = const()[name = string("value_states_169_strides_0"), val = tensor([1, 1])]; tensor value_states_169_pad_0 = const()[name = string("value_states_169_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_169_dilations_0 = const()[name = string("value_states_169_dilations_0"), val = tensor([1, 1])]; int32 value_states_169_groups_0 = const()[name = string("value_states_169_groups_0"), val = int32(1)]; tensor value_states_169 = conv(dilations = value_states_169_dilations_0, groups = value_states_169_groups_0, pad = value_states_169_pad_0, pad_type = value_states_169_pad_type_0, strides = value_states_169_strides_0, weight = model_model_layers_21_self_attn_v_proj_weight_palettized, x = var_12317)[name = string("value_states_169")]; tensor var_12359 = const()[name = string("op_12359"), val = tensor([1, 16, 128, 64])]; tensor var_12360 = reshape(shape = var_12359, x = query_states_169)[name = string("op_12360")]; tensor var_12365 = const()[name = string("op_12365"), val = tensor([0, 1, 3, 2])]; tensor var_12370 = const()[name = string("op_12370"), val = tensor([1, 8, 128, 64])]; tensor var_12371 = reshape(shape = var_12370, x = key_states_211)[name = string("op_12371")]; tensor var_12376 = const()[name = string("op_12376"), val = tensor([0, 1, 3, 2])]; tensor var_12381 = const()[name = string("op_12381"), val = tensor([1, 8, 128, 64])]; tensor var_12382 = reshape(shape = var_12381, x = value_states_169)[name = string("op_12382")]; tensor var_12387 = const()[name = string("op_12387"), val = tensor([0, 1, 3, 2])]; tensor mean_171_axes_0 = const()[name = string("mean_171_axes_0"), val = tensor([-1])]; bool mean_171_keep_dims_0 = const()[name = string("mean_171_keep_dims_0"), val = bool(true)]; tensor x_421 = transpose(perm = var_12365, x = var_12360)[name = string("transpose_61")]; tensor mean_171 = reduce_mean(axes = mean_171_axes_0, keep_dims = mean_171_keep_dims_0, x = x_421)[name = string("mean_171")]; tensor input_383 = sub(x = x_421, y = mean_171)[name = string("input_383")]; tensor var_12404_axes_0 = const()[name = string("op_12404_axes_0"), val = tensor([-1])]; tensor model_model_layers_21_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_21_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1077661312)))]; fp16 var_12392_to_fp16 = const()[name = string("op_12392_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_12404_cast_fp16 = layer_norm(axes = var_12404_axes_0, epsilon = var_12392_to_fp16, gamma = model_model_layers_21_self_attn_q_norm_weight_to_fp16, x = input_383)[name = string("op_12404_cast_fp16")]; tensor mean_173_axes_0 = const()[name = string("mean_173_axes_0"), val = tensor([-1])]; bool mean_173_keep_dims_0 = const()[name = string("mean_173_keep_dims_0"), val = bool(true)]; tensor x_423 = transpose(perm = var_12376, x = var_12371)[name = string("transpose_60")]; tensor mean_173 = reduce_mean(axes = mean_173_axes_0, keep_dims = mean_173_keep_dims_0, x = x_423)[name = string("mean_173")]; tensor input_385 = sub(x = x_423, y = mean_173)[name = string("input_385")]; tensor var_12422_axes_0 = const()[name = string("op_12422_axes_0"), val = tensor([-1])]; tensor model_model_layers_21_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_21_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1077661632)))]; fp16 var_12410_to_fp16 = const()[name = string("op_12410_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_12422_cast_fp16 = layer_norm(axes = var_12422_axes_0, epsilon = var_12410_to_fp16, gamma = model_model_layers_21_self_attn_k_norm_weight_to_fp16, x = input_385)[name = string("op_12422_cast_fp16")]; tensor var_12437 = mul(x = var_12404_cast_fp16, y = cos_5)[name = string("op_12437")]; tensor x1_85_begin_0 = const()[name = string("x1_85_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_85_end_0 = const()[name = string("x1_85_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_85_end_mask_0 = const()[name = string("x1_85_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_85 = slice_by_index(begin = x1_85_begin_0, end = x1_85_end_0, end_mask = x1_85_end_mask_0, x = var_12404_cast_fp16)[name = string("x1_85")]; tensor x2_85_begin_0 = const()[name = string("x2_85_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_85_end_0 = const()[name = string("x2_85_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_85_end_mask_0 = const()[name = string("x2_85_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_85 = slice_by_index(begin = x2_85_begin_0, end = x2_85_end_0, end_mask = x2_85_end_mask_0, x = var_12404_cast_fp16)[name = string("x2_85")]; fp16 const_469_promoted = const()[name = string("const_469_promoted"), val = fp16(-0x1p+0)]; tensor var_12458 = mul(x = x2_85, y = const_469_promoted)[name = string("op_12458")]; int32 var_12460 = const()[name = string("op_12460"), val = int32(-1)]; bool var_12461_interleave_0 = const()[name = string("op_12461_interleave_0"), val = bool(false)]; tensor var_12461 = concat(axis = var_12460, interleave = var_12461_interleave_0, values = (var_12458, x1_85))[name = string("op_12461")]; tensor var_12462 = mul(x = var_12461, y = sin_5)[name = string("op_12462")]; tensor query_states_171 = add(x = var_12437, y = var_12462)[name = string("query_states_171")]; tensor var_12465 = mul(x = var_12422_cast_fp16, y = cos_5)[name = string("op_12465")]; tensor x1_87_begin_0 = const()[name = string("x1_87_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_87_end_0 = const()[name = string("x1_87_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_87_end_mask_0 = const()[name = string("x1_87_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_87 = slice_by_index(begin = x1_87_begin_0, end = x1_87_end_0, end_mask = x1_87_end_mask_0, x = var_12422_cast_fp16)[name = string("x1_87")]; tensor x2_87_begin_0 = const()[name = string("x2_87_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_87_end_0 = const()[name = string("x2_87_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_87_end_mask_0 = const()[name = string("x2_87_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_87 = slice_by_index(begin = x2_87_begin_0, end = x2_87_end_0, end_mask = x2_87_end_mask_0, x = var_12422_cast_fp16)[name = string("x2_87")]; fp16 const_472_promoted = const()[name = string("const_472_promoted"), val = fp16(-0x1p+0)]; tensor var_12486 = mul(x = x2_87, y = const_472_promoted)[name = string("op_12486")]; int32 var_12488 = const()[name = string("op_12488"), val = int32(-1)]; bool var_12489_interleave_0 = const()[name = string("op_12489_interleave_0"), val = bool(false)]; tensor var_12489 = concat(axis = var_12488, interleave = var_12489_interleave_0, values = (var_12486, x1_87))[name = string("op_12489")]; tensor var_12490 = mul(x = var_12489, y = sin_5)[name = string("op_12490")]; tensor key_states_213 = add(x = var_12465, y = var_12490)[name = string("key_states_213")]; tensor expand_dims_252 = const()[name = string("expand_dims_252"), val = tensor([21])]; tensor expand_dims_253 = const()[name = string("expand_dims_253"), val = tensor([0])]; tensor expand_dims_255 = const()[name = string("expand_dims_255"), val = tensor([0])]; tensor expand_dims_256 = const()[name = string("expand_dims_256"), val = tensor([22])]; int32 concat_380_axis_0 = const()[name = string("concat_380_axis_0"), val = int32(0)]; bool concat_380_interleave_0 = const()[name = string("concat_380_interleave_0"), val = bool(false)]; tensor concat_380 = concat(axis = concat_380_axis_0, interleave = concat_380_interleave_0, values = (expand_dims_252, expand_dims_253, current_pos, expand_dims_255))[name = string("concat_380")]; tensor concat_381_values1_0 = const()[name = string("concat_381_values1_0"), val = tensor([0])]; tensor concat_381_values3_0 = const()[name = string("concat_381_values3_0"), val = tensor([0])]; int32 concat_381_axis_0 = const()[name = string("concat_381_axis_0"), val = int32(0)]; bool concat_381_interleave_0 = const()[name = string("concat_381_interleave_0"), val = bool(false)]; tensor concat_381 = concat(axis = concat_381_axis_0, interleave = concat_381_interleave_0, values = (expand_dims_256, concat_381_values1_0, var_1760, concat_381_values3_0))[name = string("concat_381")]; tensor model_model_kv_cache_0_internal_tensor_assign_43_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_43_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_43_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_43_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_43_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_43_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_43_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_43_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_43_cast_fp16 = slice_update(begin = concat_380, begin_mask = model_model_kv_cache_0_internal_tensor_assign_43_begin_mask_0, end = concat_381, end_mask = model_model_kv_cache_0_internal_tensor_assign_43_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_43_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_43_stride_0, update = key_states_213, x = coreml_update_state_97)[name = string("model_model_kv_cache_0_internal_tensor_assign_43_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_43_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_98_write_state")]; tensor coreml_update_state_98 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_98")]; tensor expand_dims_258 = const()[name = string("expand_dims_258"), val = tensor([49])]; tensor expand_dims_259 = const()[name = string("expand_dims_259"), val = tensor([0])]; tensor expand_dims_261 = const()[name = string("expand_dims_261"), val = tensor([0])]; tensor expand_dims_262 = const()[name = string("expand_dims_262"), val = tensor([50])]; int32 concat_384_axis_0 = const()[name = string("concat_384_axis_0"), val = int32(0)]; bool concat_384_interleave_0 = const()[name = string("concat_384_interleave_0"), val = bool(false)]; tensor concat_384 = concat(axis = concat_384_axis_0, interleave = concat_384_interleave_0, values = (expand_dims_258, expand_dims_259, current_pos, expand_dims_261))[name = string("concat_384")]; tensor concat_385_values1_0 = const()[name = string("concat_385_values1_0"), val = tensor([0])]; tensor concat_385_values3_0 = const()[name = string("concat_385_values3_0"), val = tensor([0])]; int32 concat_385_axis_0 = const()[name = string("concat_385_axis_0"), val = int32(0)]; bool concat_385_interleave_0 = const()[name = string("concat_385_interleave_0"), val = bool(false)]; tensor concat_385 = concat(axis = concat_385_axis_0, interleave = concat_385_interleave_0, values = (expand_dims_262, concat_385_values1_0, var_1760, concat_385_values3_0))[name = string("concat_385")]; tensor model_model_kv_cache_0_internal_tensor_assign_44_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_44_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_44_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_44_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_44_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_44_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_44_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_44_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_171 = transpose(perm = var_12387, x = var_12382)[name = string("transpose_59")]; tensor model_model_kv_cache_0_internal_tensor_assign_44_cast_fp16 = slice_update(begin = concat_384, begin_mask = model_model_kv_cache_0_internal_tensor_assign_44_begin_mask_0, end = concat_385, end_mask = model_model_kv_cache_0_internal_tensor_assign_44_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_44_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_44_stride_0, update = value_states_171, x = coreml_update_state_98)[name = string("model_model_kv_cache_0_internal_tensor_assign_44_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_44_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_99_write_state")]; tensor coreml_update_state_99 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_99")]; tensor var_12561_begin_0 = const()[name = string("op_12561_begin_0"), val = tensor([21, 0, 0, 0])]; tensor var_12561_end_0 = const()[name = string("op_12561_end_0"), val = tensor([22, 8, 1024, 128])]; tensor var_12561_end_mask_0 = const()[name = string("op_12561_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_12561_cast_fp16 = slice_by_index(begin = var_12561_begin_0, end = var_12561_end_0, end_mask = var_12561_end_mask_0, x = coreml_update_state_99)[name = string("op_12561_cast_fp16")]; tensor K_layer_cache_43_axes_0 = const()[name = string("K_layer_cache_43_axes_0"), val = tensor([0])]; tensor K_layer_cache_43_cast_fp16 = squeeze(axes = K_layer_cache_43_axes_0, x = var_12561_cast_fp16)[name = string("K_layer_cache_43_cast_fp16")]; tensor var_12568_begin_0 = const()[name = string("op_12568_begin_0"), val = tensor([49, 0, 0, 0])]; tensor var_12568_end_0 = const()[name = string("op_12568_end_0"), val = tensor([50, 8, 1024, 128])]; tensor var_12568_end_mask_0 = const()[name = string("op_12568_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_12568_cast_fp16 = slice_by_index(begin = var_12568_begin_0, end = var_12568_end_0, end_mask = var_12568_end_mask_0, x = coreml_update_state_99)[name = string("op_12568_cast_fp16")]; tensor V_layer_cache_43_axes_0 = const()[name = string("V_layer_cache_43_axes_0"), val = tensor([0])]; tensor V_layer_cache_43_cast_fp16 = squeeze(axes = V_layer_cache_43_axes_0, x = var_12568_cast_fp16)[name = string("V_layer_cache_43_cast_fp16")]; tensor x_427_axes_0 = const()[name = string("x_427_axes_0"), val = tensor([1])]; tensor x_427_cast_fp16 = expand_dims(axes = x_427_axes_0, x = K_layer_cache_43_cast_fp16)[name = string("x_427_cast_fp16")]; tensor var_12597 = const()[name = string("op_12597"), val = tensor([1, 2, 1, 1])]; tensor x_429_cast_fp16 = tile(reps = var_12597, x = x_427_cast_fp16)[name = string("x_429_cast_fp16")]; tensor var_12609 = const()[name = string("op_12609"), val = tensor([1, -1, 1024, 128])]; tensor key_states_217_cast_fp16 = reshape(shape = var_12609, x = x_429_cast_fp16)[name = string("key_states_217_cast_fp16")]; tensor x_433_axes_0 = const()[name = string("x_433_axes_0"), val = tensor([1])]; tensor x_433_cast_fp16 = expand_dims(axes = x_433_axes_0, x = V_layer_cache_43_cast_fp16)[name = string("x_433_cast_fp16")]; tensor var_12617 = const()[name = string("op_12617"), val = tensor([1, 2, 1, 1])]; tensor x_435_cast_fp16 = tile(reps = var_12617, x = x_433_cast_fp16)[name = string("x_435_cast_fp16")]; bool var_12644_transpose_x_0 = const()[name = string("op_12644_transpose_x_0"), val = bool(false)]; bool var_12644_transpose_y_0 = const()[name = string("op_12644_transpose_y_0"), val = bool(true)]; tensor var_12644 = matmul(transpose_x = var_12644_transpose_x_0, transpose_y = var_12644_transpose_y_0, x = query_states_171, y = key_states_217_cast_fp16)[name = string("op_12644")]; fp16 var_12645_to_fp16 = const()[name = string("op_12645_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_85_cast_fp16 = mul(x = var_12644, y = var_12645_to_fp16)[name = string("attn_weights_85_cast_fp16")]; tensor attn_weights_87_cast_fp16 = add(x = attn_weights_85_cast_fp16, y = causal_mask)[name = string("attn_weights_87_cast_fp16")]; int32 var_12680 = const()[name = string("op_12680"), val = int32(-1)]; tensor var_12682_cast_fp16 = softmax(axis = var_12680, x = attn_weights_87_cast_fp16)[name = string("op_12682_cast_fp16")]; tensor concat_390 = const()[name = string("concat_390"), val = tensor([16, 64, 1024])]; tensor reshape_63_cast_fp16 = reshape(shape = concat_390, x = var_12682_cast_fp16)[name = string("reshape_63_cast_fp16")]; tensor concat_391 = const()[name = string("concat_391"), val = tensor([16, 1024, 128])]; tensor reshape_64_cast_fp16 = reshape(shape = concat_391, x = x_435_cast_fp16)[name = string("reshape_64_cast_fp16")]; bool matmul_21_transpose_x_0 = const()[name = string("matmul_21_transpose_x_0"), val = bool(false)]; bool matmul_21_transpose_y_0 = const()[name = string("matmul_21_transpose_y_0"), val = bool(false)]; tensor matmul_21_cast_fp16 = matmul(transpose_x = matmul_21_transpose_x_0, transpose_y = matmul_21_transpose_y_0, x = reshape_63_cast_fp16, y = reshape_64_cast_fp16)[name = string("matmul_21_cast_fp16")]; tensor concat_395 = const()[name = string("concat_395"), val = tensor([1, 16, 64, 128])]; tensor reshape_65_cast_fp16 = reshape(shape = concat_395, x = matmul_21_cast_fp16)[name = string("reshape_65_cast_fp16")]; tensor var_12694_perm_0 = const()[name = string("op_12694_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_12713 = const()[name = string("op_12713"), val = tensor([1, 64, 2048])]; tensor var_12694_cast_fp16 = transpose(perm = var_12694_perm_0, x = reshape_65_cast_fp16)[name = string("transpose_58")]; tensor attn_output_215_cast_fp16 = reshape(shape = var_12713, x = var_12694_cast_fp16)[name = string("attn_output_215_cast_fp16")]; tensor var_12718 = const()[name = string("op_12718"), val = tensor([0, 2, 1])]; string var_12734_pad_type_0 = const()[name = string("op_12734_pad_type_0"), val = string("valid")]; int32 var_12734_groups_0 = const()[name = string("op_12734_groups_0"), val = int32(1)]; tensor var_12734_strides_0 = const()[name = string("op_12734_strides_0"), val = tensor([1])]; tensor var_12734_pad_0 = const()[name = string("op_12734_pad_0"), val = tensor([0, 0])]; tensor var_12734_dilations_0 = const()[name = string("op_12734_dilations_0"), val = tensor([1])]; tensor squeeze_21_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1077661952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1080807744))))[name = string("squeeze_21_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_12719_cast_fp16 = transpose(perm = var_12718, x = attn_output_215_cast_fp16)[name = string("transpose_57")]; tensor var_12734_cast_fp16 = conv(dilations = var_12734_dilations_0, groups = var_12734_groups_0, pad = var_12734_pad_0, pad_type = var_12734_pad_type_0, strides = var_12734_strides_0, weight = squeeze_21_cast_fp16_to_fp32_to_fp16_palettized, x = var_12719_cast_fp16)[name = string("op_12734_cast_fp16")]; tensor var_12738 = const()[name = string("op_12738"), val = tensor([0, 2, 1])]; tensor attn_output_219_cast_fp16 = transpose(perm = var_12738, x = var_12734_cast_fp16)[name = string("transpose_56")]; tensor hidden_states_131_cast_fp16 = add(x = hidden_states_127_cast_fp16, y = attn_output_219_cast_fp16)[name = string("hidden_states_131_cast_fp16")]; tensor mean_175_axes_0 = const()[name = string("mean_175_axes_0"), val = tensor([-1])]; bool mean_175_keep_dims_0 = const()[name = string("mean_175_keep_dims_0"), val = bool(true)]; tensor mean_175_cast_fp16 = reduce_mean(axes = mean_175_axes_0, keep_dims = mean_175_keep_dims_0, x = hidden_states_131_cast_fp16)[name = string("mean_175_cast_fp16")]; tensor input_389_cast_fp16 = sub(x = hidden_states_131_cast_fp16, y = mean_175_cast_fp16)[name = string("input_389_cast_fp16")]; tensor var_12757_axes_0 = const()[name = string("op_12757_axes_0"), val = tensor([-1])]; tensor model_model_layers_21_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_21_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1080840576)))]; fp16 var_12745_to_fp16 = const()[name = string("op_12745_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_12757_cast_fp16 = layer_norm(axes = var_12757_axes_0, epsilon = var_12745_to_fp16, gamma = model_model_layers_21_post_attention_layernorm_weight_to_fp16, x = input_389_cast_fp16)[name = string("op_12757_cast_fp16")]; tensor var_12771 = const()[name = string("op_12771"), val = tensor([0, 2, 1])]; tensor input_391_axes_0 = const()[name = string("input_391_axes_0"), val = tensor([2])]; tensor var_12772 = transpose(perm = var_12771, x = var_12757_cast_fp16)[name = string("transpose_55")]; tensor input_391 = expand_dims(axes = input_391_axes_0, x = var_12772)[name = string("input_391")]; string input_393_pad_type_0 = const()[name = string("input_393_pad_type_0"), val = string("valid")]; tensor input_393_strides_0 = const()[name = string("input_393_strides_0"), val = tensor([1, 1])]; tensor input_393_pad_0 = const()[name = string("input_393_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_393_dilations_0 = const()[name = string("input_393_dilations_0"), val = tensor([1, 1])]; int32 input_393_groups_0 = const()[name = string("input_393_groups_0"), val = int32(1)]; tensor input_393 = conv(dilations = input_393_dilations_0, groups = input_393_groups_0, pad = input_393_pad_0, pad_type = input_393_pad_type_0, strides = input_393_strides_0, weight = model_model_layers_21_mlp_gate_proj_weight_palettized, x = input_391)[name = string("input_393")]; string b_43_pad_type_0 = const()[name = string("b_43_pad_type_0"), val = string("valid")]; tensor b_43_strides_0 = const()[name = string("b_43_strides_0"), val = tensor([1, 1])]; tensor b_43_pad_0 = const()[name = string("b_43_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_43_dilations_0 = const()[name = string("b_43_dilations_0"), val = tensor([1, 1])]; int32 b_43_groups_0 = const()[name = string("b_43_groups_0"), val = int32(1)]; tensor b_43 = conv(dilations = b_43_dilations_0, groups = b_43_groups_0, pad = b_43_pad_0, pad_type = b_43_pad_type_0, strides = b_43_strides_0, weight = model_model_layers_21_mlp_up_proj_weight_palettized, x = input_391)[name = string("b_43")]; tensor c_43 = silu(x = input_393)[name = string("c_43")]; tensor input_395 = mul(x = c_43, y = b_43)[name = string("input_395")]; string e_43_pad_type_0 = const()[name = string("e_43_pad_type_0"), val = string("valid")]; tensor e_43_strides_0 = const()[name = string("e_43_strides_0"), val = tensor([1, 1])]; tensor e_43_pad_0 = const()[name = string("e_43_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_43_dilations_0 = const()[name = string("e_43_dilations_0"), val = tensor([1, 1])]; int32 e_43_groups_0 = const()[name = string("e_43_groups_0"), val = int32(1)]; tensor e_43 = conv(dilations = e_43_dilations_0, groups = e_43_groups_0, pad = e_43_pad_0, pad_type = e_43_pad_type_0, strides = e_43_strides_0, weight = model_model_layers_21_mlp_down_proj_weight_palettized, x = input_395)[name = string("e_43")]; tensor var_12794_axes_0 = const()[name = string("op_12794_axes_0"), val = tensor([2])]; tensor var_12794 = squeeze(axes = var_12794_axes_0, x = e_43)[name = string("op_12794")]; tensor var_12795 = const()[name = string("op_12795"), val = tensor([0, 2, 1])]; tensor var_12796 = transpose(perm = var_12795, x = var_12794)[name = string("transpose_54")]; tensor hidden_states_133_cast_fp16 = add(x = hidden_states_131_cast_fp16, y = var_12796)[name = string("hidden_states_133_cast_fp16")]; tensor mean_177_axes_0 = const()[name = string("mean_177_axes_0"), val = tensor([-1])]; bool mean_177_keep_dims_0 = const()[name = string("mean_177_keep_dims_0"), val = bool(true)]; tensor mean_177_cast_fp16 = reduce_mean(axes = mean_177_axes_0, keep_dims = mean_177_keep_dims_0, x = hidden_states_133_cast_fp16)[name = string("mean_177_cast_fp16")]; tensor input_397_cast_fp16 = sub(x = hidden_states_133_cast_fp16, y = mean_177_cast_fp16)[name = string("input_397_cast_fp16")]; tensor var_12814_axes_0 = const()[name = string("op_12814_axes_0"), val = tensor([-1])]; tensor model_model_layers_22_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_22_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1080844736)))]; fp16 var_12802_to_fp16 = const()[name = string("op_12802_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_12814_cast_fp16 = layer_norm(axes = var_12814_axes_0, epsilon = var_12802_to_fp16, gamma = model_model_layers_22_input_layernorm_weight_to_fp16, x = input_397_cast_fp16)[name = string("op_12814_cast_fp16")]; tensor var_12826 = const()[name = string("op_12826"), val = tensor([0, 2, 1])]; tensor var_12829_axes_0 = const()[name = string("op_12829_axes_0"), val = tensor([2])]; tensor var_12827 = transpose(perm = var_12826, x = var_12814_cast_fp16)[name = string("transpose_53")]; tensor var_12829 = expand_dims(axes = var_12829_axes_0, x = var_12827)[name = string("op_12829")]; string query_states_177_pad_type_0 = const()[name = string("query_states_177_pad_type_0"), val = string("valid")]; tensor query_states_177_strides_0 = const()[name = string("query_states_177_strides_0"), val = tensor([1, 1])]; tensor query_states_177_pad_0 = const()[name = string("query_states_177_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_177_dilations_0 = const()[name = string("query_states_177_dilations_0"), val = tensor([1, 1])]; int32 query_states_177_groups_0 = const()[name = string("query_states_177_groups_0"), val = int32(1)]; tensor query_states_177 = conv(dilations = query_states_177_dilations_0, groups = query_states_177_groups_0, pad = query_states_177_pad_0, pad_type = query_states_177_pad_type_0, strides = query_states_177_strides_0, weight = model_model_layers_22_self_attn_q_proj_weight_palettized, x = var_12829)[name = string("query_states_177")]; string key_states_221_pad_type_0 = const()[name = string("key_states_221_pad_type_0"), val = string("valid")]; tensor key_states_221_strides_0 = const()[name = string("key_states_221_strides_0"), val = tensor([1, 1])]; tensor key_states_221_pad_0 = const()[name = string("key_states_221_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_221_dilations_0 = const()[name = string("key_states_221_dilations_0"), val = tensor([1, 1])]; int32 key_states_221_groups_0 = const()[name = string("key_states_221_groups_0"), val = int32(1)]; tensor key_states_221 = conv(dilations = key_states_221_dilations_0, groups = key_states_221_groups_0, pad = key_states_221_pad_0, pad_type = key_states_221_pad_type_0, strides = key_states_221_strides_0, weight = model_model_layers_22_self_attn_k_proj_weight_palettized, x = var_12829)[name = string("key_states_221")]; string value_states_177_pad_type_0 = const()[name = string("value_states_177_pad_type_0"), val = string("valid")]; tensor value_states_177_strides_0 = const()[name = string("value_states_177_strides_0"), val = tensor([1, 1])]; tensor value_states_177_pad_0 = const()[name = string("value_states_177_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_177_dilations_0 = const()[name = string("value_states_177_dilations_0"), val = tensor([1, 1])]; int32 value_states_177_groups_0 = const()[name = string("value_states_177_groups_0"), val = int32(1)]; tensor value_states_177 = conv(dilations = value_states_177_dilations_0, groups = value_states_177_groups_0, pad = value_states_177_pad_0, pad_type = value_states_177_pad_type_0, strides = value_states_177_strides_0, weight = model_model_layers_22_self_attn_v_proj_weight_palettized, x = var_12829)[name = string("value_states_177")]; tensor var_12871 = const()[name = string("op_12871"), val = tensor([1, 16, 128, 64])]; tensor var_12872 = reshape(shape = var_12871, x = query_states_177)[name = string("op_12872")]; tensor var_12877 = const()[name = string("op_12877"), val = tensor([0, 1, 3, 2])]; tensor var_12882 = const()[name = string("op_12882"), val = tensor([1, 8, 128, 64])]; tensor var_12883 = reshape(shape = var_12882, x = key_states_221)[name = string("op_12883")]; tensor var_12888 = const()[name = string("op_12888"), val = tensor([0, 1, 3, 2])]; tensor var_12893 = const()[name = string("op_12893"), val = tensor([1, 8, 128, 64])]; tensor var_12894 = reshape(shape = var_12893, x = value_states_177)[name = string("op_12894")]; tensor var_12899 = const()[name = string("op_12899"), val = tensor([0, 1, 3, 2])]; tensor mean_179_axes_0 = const()[name = string("mean_179_axes_0"), val = tensor([-1])]; bool mean_179_keep_dims_0 = const()[name = string("mean_179_keep_dims_0"), val = bool(true)]; tensor x_441 = transpose(perm = var_12877, x = var_12872)[name = string("transpose_52")]; tensor mean_179 = reduce_mean(axes = mean_179_axes_0, keep_dims = mean_179_keep_dims_0, x = x_441)[name = string("mean_179")]; tensor input_401 = sub(x = x_441, y = mean_179)[name = string("input_401")]; tensor var_12916_axes_0 = const()[name = string("op_12916_axes_0"), val = tensor([-1])]; tensor model_model_layers_22_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_22_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1080848896)))]; fp16 var_12904_to_fp16 = const()[name = string("op_12904_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_12916_cast_fp16 = layer_norm(axes = var_12916_axes_0, epsilon = var_12904_to_fp16, gamma = model_model_layers_22_self_attn_q_norm_weight_to_fp16, x = input_401)[name = string("op_12916_cast_fp16")]; tensor mean_181_axes_0 = const()[name = string("mean_181_axes_0"), val = tensor([-1])]; bool mean_181_keep_dims_0 = const()[name = string("mean_181_keep_dims_0"), val = bool(true)]; tensor x_443 = transpose(perm = var_12888, x = var_12883)[name = string("transpose_51")]; tensor mean_181 = reduce_mean(axes = mean_181_axes_0, keep_dims = mean_181_keep_dims_0, x = x_443)[name = string("mean_181")]; tensor input_403 = sub(x = x_443, y = mean_181)[name = string("input_403")]; tensor var_12934_axes_0 = const()[name = string("op_12934_axes_0"), val = tensor([-1])]; tensor model_model_layers_22_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_22_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1080849216)))]; fp16 var_12922_to_fp16 = const()[name = string("op_12922_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_12934_cast_fp16 = layer_norm(axes = var_12934_axes_0, epsilon = var_12922_to_fp16, gamma = model_model_layers_22_self_attn_k_norm_weight_to_fp16, x = input_403)[name = string("op_12934_cast_fp16")]; tensor var_12949 = mul(x = var_12916_cast_fp16, y = cos_5)[name = string("op_12949")]; tensor x1_89_begin_0 = const()[name = string("x1_89_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_89_end_0 = const()[name = string("x1_89_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_89_end_mask_0 = const()[name = string("x1_89_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_89 = slice_by_index(begin = x1_89_begin_0, end = x1_89_end_0, end_mask = x1_89_end_mask_0, x = var_12916_cast_fp16)[name = string("x1_89")]; tensor x2_89_begin_0 = const()[name = string("x2_89_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_89_end_0 = const()[name = string("x2_89_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_89_end_mask_0 = const()[name = string("x2_89_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_89 = slice_by_index(begin = x2_89_begin_0, end = x2_89_end_0, end_mask = x2_89_end_mask_0, x = var_12916_cast_fp16)[name = string("x2_89")]; fp16 const_491_promoted = const()[name = string("const_491_promoted"), val = fp16(-0x1p+0)]; tensor var_12970 = mul(x = x2_89, y = const_491_promoted)[name = string("op_12970")]; int32 var_12972 = const()[name = string("op_12972"), val = int32(-1)]; bool var_12973_interleave_0 = const()[name = string("op_12973_interleave_0"), val = bool(false)]; tensor var_12973 = concat(axis = var_12972, interleave = var_12973_interleave_0, values = (var_12970, x1_89))[name = string("op_12973")]; tensor var_12974 = mul(x = var_12973, y = sin_5)[name = string("op_12974")]; tensor query_states_179 = add(x = var_12949, y = var_12974)[name = string("query_states_179")]; tensor var_12977 = mul(x = var_12934_cast_fp16, y = cos_5)[name = string("op_12977")]; tensor x1_91_begin_0 = const()[name = string("x1_91_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_91_end_0 = const()[name = string("x1_91_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_91_end_mask_0 = const()[name = string("x1_91_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_91 = slice_by_index(begin = x1_91_begin_0, end = x1_91_end_0, end_mask = x1_91_end_mask_0, x = var_12934_cast_fp16)[name = string("x1_91")]; tensor x2_91_begin_0 = const()[name = string("x2_91_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_91_end_0 = const()[name = string("x2_91_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_91_end_mask_0 = const()[name = string("x2_91_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_91 = slice_by_index(begin = x2_91_begin_0, end = x2_91_end_0, end_mask = x2_91_end_mask_0, x = var_12934_cast_fp16)[name = string("x2_91")]; fp16 const_494_promoted = const()[name = string("const_494_promoted"), val = fp16(-0x1p+0)]; tensor var_12998 = mul(x = x2_91, y = const_494_promoted)[name = string("op_12998")]; int32 var_13000 = const()[name = string("op_13000"), val = int32(-1)]; bool var_13001_interleave_0 = const()[name = string("op_13001_interleave_0"), val = bool(false)]; tensor var_13001 = concat(axis = var_13000, interleave = var_13001_interleave_0, values = (var_12998, x1_91))[name = string("op_13001")]; tensor var_13002 = mul(x = var_13001, y = sin_5)[name = string("op_13002")]; tensor key_states_223 = add(x = var_12977, y = var_13002)[name = string("key_states_223")]; tensor expand_dims_264 = const()[name = string("expand_dims_264"), val = tensor([22])]; tensor expand_dims_265 = const()[name = string("expand_dims_265"), val = tensor([0])]; tensor expand_dims_267 = const()[name = string("expand_dims_267"), val = tensor([0])]; tensor expand_dims_268 = const()[name = string("expand_dims_268"), val = tensor([23])]; int32 concat_398_axis_0 = const()[name = string("concat_398_axis_0"), val = int32(0)]; bool concat_398_interleave_0 = const()[name = string("concat_398_interleave_0"), val = bool(false)]; tensor concat_398 = concat(axis = concat_398_axis_0, interleave = concat_398_interleave_0, values = (expand_dims_264, expand_dims_265, current_pos, expand_dims_267))[name = string("concat_398")]; tensor concat_399_values1_0 = const()[name = string("concat_399_values1_0"), val = tensor([0])]; tensor concat_399_values3_0 = const()[name = string("concat_399_values3_0"), val = tensor([0])]; int32 concat_399_axis_0 = const()[name = string("concat_399_axis_0"), val = int32(0)]; bool concat_399_interleave_0 = const()[name = string("concat_399_interleave_0"), val = bool(false)]; tensor concat_399 = concat(axis = concat_399_axis_0, interleave = concat_399_interleave_0, values = (expand_dims_268, concat_399_values1_0, var_1760, concat_399_values3_0))[name = string("concat_399")]; tensor model_model_kv_cache_0_internal_tensor_assign_45_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_45_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_45_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_45_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_45_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_45_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_45_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_45_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_45_cast_fp16 = slice_update(begin = concat_398, begin_mask = model_model_kv_cache_0_internal_tensor_assign_45_begin_mask_0, end = concat_399, end_mask = model_model_kv_cache_0_internal_tensor_assign_45_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_45_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_45_stride_0, update = key_states_223, x = coreml_update_state_99)[name = string("model_model_kv_cache_0_internal_tensor_assign_45_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_45_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_100_write_state")]; tensor coreml_update_state_100 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_100")]; tensor expand_dims_270 = const()[name = string("expand_dims_270"), val = tensor([50])]; tensor expand_dims_271 = const()[name = string("expand_dims_271"), val = tensor([0])]; tensor expand_dims_273 = const()[name = string("expand_dims_273"), val = tensor([0])]; tensor expand_dims_274 = const()[name = string("expand_dims_274"), val = tensor([51])]; int32 concat_402_axis_0 = const()[name = string("concat_402_axis_0"), val = int32(0)]; bool concat_402_interleave_0 = const()[name = string("concat_402_interleave_0"), val = bool(false)]; tensor concat_402 = concat(axis = concat_402_axis_0, interleave = concat_402_interleave_0, values = (expand_dims_270, expand_dims_271, current_pos, expand_dims_273))[name = string("concat_402")]; tensor concat_403_values1_0 = const()[name = string("concat_403_values1_0"), val = tensor([0])]; tensor concat_403_values3_0 = const()[name = string("concat_403_values3_0"), val = tensor([0])]; int32 concat_403_axis_0 = const()[name = string("concat_403_axis_0"), val = int32(0)]; bool concat_403_interleave_0 = const()[name = string("concat_403_interleave_0"), val = bool(false)]; tensor concat_403 = concat(axis = concat_403_axis_0, interleave = concat_403_interleave_0, values = (expand_dims_274, concat_403_values1_0, var_1760, concat_403_values3_0))[name = string("concat_403")]; tensor model_model_kv_cache_0_internal_tensor_assign_46_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_46_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_46_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_46_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_46_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_46_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_46_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_46_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_179 = transpose(perm = var_12899, x = var_12894)[name = string("transpose_50")]; tensor model_model_kv_cache_0_internal_tensor_assign_46_cast_fp16 = slice_update(begin = concat_402, begin_mask = model_model_kv_cache_0_internal_tensor_assign_46_begin_mask_0, end = concat_403, end_mask = model_model_kv_cache_0_internal_tensor_assign_46_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_46_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_46_stride_0, update = value_states_179, x = coreml_update_state_100)[name = string("model_model_kv_cache_0_internal_tensor_assign_46_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_46_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_101_write_state")]; tensor coreml_update_state_101 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_101")]; tensor var_13073_begin_0 = const()[name = string("op_13073_begin_0"), val = tensor([22, 0, 0, 0])]; tensor var_13073_end_0 = const()[name = string("op_13073_end_0"), val = tensor([23, 8, 1024, 128])]; tensor var_13073_end_mask_0 = const()[name = string("op_13073_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_13073_cast_fp16 = slice_by_index(begin = var_13073_begin_0, end = var_13073_end_0, end_mask = var_13073_end_mask_0, x = coreml_update_state_101)[name = string("op_13073_cast_fp16")]; tensor K_layer_cache_45_axes_0 = const()[name = string("K_layer_cache_45_axes_0"), val = tensor([0])]; tensor K_layer_cache_45_cast_fp16 = squeeze(axes = K_layer_cache_45_axes_0, x = var_13073_cast_fp16)[name = string("K_layer_cache_45_cast_fp16")]; tensor var_13080_begin_0 = const()[name = string("op_13080_begin_0"), val = tensor([50, 0, 0, 0])]; tensor var_13080_end_0 = const()[name = string("op_13080_end_0"), val = tensor([51, 8, 1024, 128])]; tensor var_13080_end_mask_0 = const()[name = string("op_13080_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_13080_cast_fp16 = slice_by_index(begin = var_13080_begin_0, end = var_13080_end_0, end_mask = var_13080_end_mask_0, x = coreml_update_state_101)[name = string("op_13080_cast_fp16")]; tensor V_layer_cache_45_axes_0 = const()[name = string("V_layer_cache_45_axes_0"), val = tensor([0])]; tensor V_layer_cache_45_cast_fp16 = squeeze(axes = V_layer_cache_45_axes_0, x = var_13080_cast_fp16)[name = string("V_layer_cache_45_cast_fp16")]; tensor x_447_axes_0 = const()[name = string("x_447_axes_0"), val = tensor([1])]; tensor x_447_cast_fp16 = expand_dims(axes = x_447_axes_0, x = K_layer_cache_45_cast_fp16)[name = string("x_447_cast_fp16")]; tensor var_13109 = const()[name = string("op_13109"), val = tensor([1, 2, 1, 1])]; tensor x_449_cast_fp16 = tile(reps = var_13109, x = x_447_cast_fp16)[name = string("x_449_cast_fp16")]; tensor var_13121 = const()[name = string("op_13121"), val = tensor([1, -1, 1024, 128])]; tensor key_states_227_cast_fp16 = reshape(shape = var_13121, x = x_449_cast_fp16)[name = string("key_states_227_cast_fp16")]; tensor x_453_axes_0 = const()[name = string("x_453_axes_0"), val = tensor([1])]; tensor x_453_cast_fp16 = expand_dims(axes = x_453_axes_0, x = V_layer_cache_45_cast_fp16)[name = string("x_453_cast_fp16")]; tensor var_13129 = const()[name = string("op_13129"), val = tensor([1, 2, 1, 1])]; tensor x_455_cast_fp16 = tile(reps = var_13129, x = x_453_cast_fp16)[name = string("x_455_cast_fp16")]; bool var_13156_transpose_x_0 = const()[name = string("op_13156_transpose_x_0"), val = bool(false)]; bool var_13156_transpose_y_0 = const()[name = string("op_13156_transpose_y_0"), val = bool(true)]; tensor var_13156 = matmul(transpose_x = var_13156_transpose_x_0, transpose_y = var_13156_transpose_y_0, x = query_states_179, y = key_states_227_cast_fp16)[name = string("op_13156")]; fp16 var_13157_to_fp16 = const()[name = string("op_13157_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_89_cast_fp16 = mul(x = var_13156, y = var_13157_to_fp16)[name = string("attn_weights_89_cast_fp16")]; tensor attn_weights_91_cast_fp16 = add(x = attn_weights_89_cast_fp16, y = causal_mask)[name = string("attn_weights_91_cast_fp16")]; int32 var_13192 = const()[name = string("op_13192"), val = int32(-1)]; tensor var_13194_cast_fp16 = softmax(axis = var_13192, x = attn_weights_91_cast_fp16)[name = string("op_13194_cast_fp16")]; tensor concat_408 = const()[name = string("concat_408"), val = tensor([16, 64, 1024])]; tensor reshape_66_cast_fp16 = reshape(shape = concat_408, x = var_13194_cast_fp16)[name = string("reshape_66_cast_fp16")]; tensor concat_409 = const()[name = string("concat_409"), val = tensor([16, 1024, 128])]; tensor reshape_67_cast_fp16 = reshape(shape = concat_409, x = x_455_cast_fp16)[name = string("reshape_67_cast_fp16")]; bool matmul_22_transpose_x_0 = const()[name = string("matmul_22_transpose_x_0"), val = bool(false)]; bool matmul_22_transpose_y_0 = const()[name = string("matmul_22_transpose_y_0"), val = bool(false)]; tensor matmul_22_cast_fp16 = matmul(transpose_x = matmul_22_transpose_x_0, transpose_y = matmul_22_transpose_y_0, x = reshape_66_cast_fp16, y = reshape_67_cast_fp16)[name = string("matmul_22_cast_fp16")]; tensor concat_413 = const()[name = string("concat_413"), val = tensor([1, 16, 64, 128])]; tensor reshape_68_cast_fp16 = reshape(shape = concat_413, x = matmul_22_cast_fp16)[name = string("reshape_68_cast_fp16")]; tensor var_13206_perm_0 = const()[name = string("op_13206_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_13225 = const()[name = string("op_13225"), val = tensor([1, 64, 2048])]; tensor var_13206_cast_fp16 = transpose(perm = var_13206_perm_0, x = reshape_68_cast_fp16)[name = string("transpose_49")]; tensor attn_output_225_cast_fp16 = reshape(shape = var_13225, x = var_13206_cast_fp16)[name = string("attn_output_225_cast_fp16")]; tensor var_13230 = const()[name = string("op_13230"), val = tensor([0, 2, 1])]; string var_13246_pad_type_0 = const()[name = string("op_13246_pad_type_0"), val = string("valid")]; int32 var_13246_groups_0 = const()[name = string("op_13246_groups_0"), val = int32(1)]; tensor var_13246_strides_0 = const()[name = string("op_13246_strides_0"), val = tensor([1])]; tensor var_13246_pad_0 = const()[name = string("op_13246_pad_0"), val = tensor([0, 0])]; tensor var_13246_dilations_0 = const()[name = string("op_13246_dilations_0"), val = tensor([1])]; tensor squeeze_22_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1080849536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1083995328))))[name = string("squeeze_22_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_13231_cast_fp16 = transpose(perm = var_13230, x = attn_output_225_cast_fp16)[name = string("transpose_48")]; tensor var_13246_cast_fp16 = conv(dilations = var_13246_dilations_0, groups = var_13246_groups_0, pad = var_13246_pad_0, pad_type = var_13246_pad_type_0, strides = var_13246_strides_0, weight = squeeze_22_cast_fp16_to_fp32_to_fp16_palettized, x = var_13231_cast_fp16)[name = string("op_13246_cast_fp16")]; tensor var_13250 = const()[name = string("op_13250"), val = tensor([0, 2, 1])]; tensor attn_output_229_cast_fp16 = transpose(perm = var_13250, x = var_13246_cast_fp16)[name = string("transpose_47")]; tensor hidden_states_137_cast_fp16 = add(x = hidden_states_133_cast_fp16, y = attn_output_229_cast_fp16)[name = string("hidden_states_137_cast_fp16")]; tensor mean_183_axes_0 = const()[name = string("mean_183_axes_0"), val = tensor([-1])]; bool mean_183_keep_dims_0 = const()[name = string("mean_183_keep_dims_0"), val = bool(true)]; tensor mean_183_cast_fp16 = reduce_mean(axes = mean_183_axes_0, keep_dims = mean_183_keep_dims_0, x = hidden_states_137_cast_fp16)[name = string("mean_183_cast_fp16")]; tensor input_407_cast_fp16 = sub(x = hidden_states_137_cast_fp16, y = mean_183_cast_fp16)[name = string("input_407_cast_fp16")]; tensor var_13269_axes_0 = const()[name = string("op_13269_axes_0"), val = tensor([-1])]; tensor model_model_layers_22_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_22_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1084028160)))]; fp16 var_13257_to_fp16 = const()[name = string("op_13257_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_13269_cast_fp16 = layer_norm(axes = var_13269_axes_0, epsilon = var_13257_to_fp16, gamma = model_model_layers_22_post_attention_layernorm_weight_to_fp16, x = input_407_cast_fp16)[name = string("op_13269_cast_fp16")]; tensor var_13283 = const()[name = string("op_13283"), val = tensor([0, 2, 1])]; tensor input_409_axes_0 = const()[name = string("input_409_axes_0"), val = tensor([2])]; tensor var_13284 = transpose(perm = var_13283, x = var_13269_cast_fp16)[name = string("transpose_46")]; tensor input_409 = expand_dims(axes = input_409_axes_0, x = var_13284)[name = string("input_409")]; string input_411_pad_type_0 = const()[name = string("input_411_pad_type_0"), val = string("valid")]; tensor input_411_strides_0 = const()[name = string("input_411_strides_0"), val = tensor([1, 1])]; tensor input_411_pad_0 = const()[name = string("input_411_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_411_dilations_0 = const()[name = string("input_411_dilations_0"), val = tensor([1, 1])]; int32 input_411_groups_0 = const()[name = string("input_411_groups_0"), val = int32(1)]; tensor input_411 = conv(dilations = input_411_dilations_0, groups = input_411_groups_0, pad = input_411_pad_0, pad_type = input_411_pad_type_0, strides = input_411_strides_0, weight = model_model_layers_22_mlp_gate_proj_weight_palettized, x = input_409)[name = string("input_411")]; string b_45_pad_type_0 = const()[name = string("b_45_pad_type_0"), val = string("valid")]; tensor b_45_strides_0 = const()[name = string("b_45_strides_0"), val = tensor([1, 1])]; tensor b_45_pad_0 = const()[name = string("b_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_45_dilations_0 = const()[name = string("b_45_dilations_0"), val = tensor([1, 1])]; int32 b_45_groups_0 = const()[name = string("b_45_groups_0"), val = int32(1)]; tensor b_45 = conv(dilations = b_45_dilations_0, groups = b_45_groups_0, pad = b_45_pad_0, pad_type = b_45_pad_type_0, strides = b_45_strides_0, weight = model_model_layers_22_mlp_up_proj_weight_palettized, x = input_409)[name = string("b_45")]; tensor c_45 = silu(x = input_411)[name = string("c_45")]; tensor input_413 = mul(x = c_45, y = b_45)[name = string("input_413")]; string e_45_pad_type_0 = const()[name = string("e_45_pad_type_0"), val = string("valid")]; tensor e_45_strides_0 = const()[name = string("e_45_strides_0"), val = tensor([1, 1])]; tensor e_45_pad_0 = const()[name = string("e_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_45_dilations_0 = const()[name = string("e_45_dilations_0"), val = tensor([1, 1])]; int32 e_45_groups_0 = const()[name = string("e_45_groups_0"), val = int32(1)]; tensor e_45 = conv(dilations = e_45_dilations_0, groups = e_45_groups_0, pad = e_45_pad_0, pad_type = e_45_pad_type_0, strides = e_45_strides_0, weight = model_model_layers_22_mlp_down_proj_weight_palettized, x = input_413)[name = string("e_45")]; tensor var_13306_axes_0 = const()[name = string("op_13306_axes_0"), val = tensor([2])]; tensor var_13306 = squeeze(axes = var_13306_axes_0, x = e_45)[name = string("op_13306")]; tensor var_13307 = const()[name = string("op_13307"), val = tensor([0, 2, 1])]; tensor var_13308 = transpose(perm = var_13307, x = var_13306)[name = string("transpose_45")]; tensor hidden_states_139_cast_fp16 = add(x = hidden_states_137_cast_fp16, y = var_13308)[name = string("hidden_states_139_cast_fp16")]; tensor mean_185_axes_0 = const()[name = string("mean_185_axes_0"), val = tensor([-1])]; bool mean_185_keep_dims_0 = const()[name = string("mean_185_keep_dims_0"), val = bool(true)]; tensor mean_185_cast_fp16 = reduce_mean(axes = mean_185_axes_0, keep_dims = mean_185_keep_dims_0, x = hidden_states_139_cast_fp16)[name = string("mean_185_cast_fp16")]; tensor input_415_cast_fp16 = sub(x = hidden_states_139_cast_fp16, y = mean_185_cast_fp16)[name = string("input_415_cast_fp16")]; tensor var_13326_axes_0 = const()[name = string("op_13326_axes_0"), val = tensor([-1])]; tensor model_model_layers_23_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_23_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1084032320)))]; fp16 var_13314_to_fp16 = const()[name = string("op_13314_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_13326_cast_fp16 = layer_norm(axes = var_13326_axes_0, epsilon = var_13314_to_fp16, gamma = model_model_layers_23_input_layernorm_weight_to_fp16, x = input_415_cast_fp16)[name = string("op_13326_cast_fp16")]; tensor var_13338 = const()[name = string("op_13338"), val = tensor([0, 2, 1])]; tensor var_13341_axes_0 = const()[name = string("op_13341_axes_0"), val = tensor([2])]; tensor var_13339 = transpose(perm = var_13338, x = var_13326_cast_fp16)[name = string("transpose_44")]; tensor var_13341 = expand_dims(axes = var_13341_axes_0, x = var_13339)[name = string("op_13341")]; string query_states_185_pad_type_0 = const()[name = string("query_states_185_pad_type_0"), val = string("valid")]; tensor query_states_185_strides_0 = const()[name = string("query_states_185_strides_0"), val = tensor([1, 1])]; tensor query_states_185_pad_0 = const()[name = string("query_states_185_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_185_dilations_0 = const()[name = string("query_states_185_dilations_0"), val = tensor([1, 1])]; int32 query_states_185_groups_0 = const()[name = string("query_states_185_groups_0"), val = int32(1)]; tensor query_states_185 = conv(dilations = query_states_185_dilations_0, groups = query_states_185_groups_0, pad = query_states_185_pad_0, pad_type = query_states_185_pad_type_0, strides = query_states_185_strides_0, weight = model_model_layers_23_self_attn_q_proj_weight_palettized, x = var_13341)[name = string("query_states_185")]; string key_states_231_pad_type_0 = const()[name = string("key_states_231_pad_type_0"), val = string("valid")]; tensor key_states_231_strides_0 = const()[name = string("key_states_231_strides_0"), val = tensor([1, 1])]; tensor key_states_231_pad_0 = const()[name = string("key_states_231_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_231_dilations_0 = const()[name = string("key_states_231_dilations_0"), val = tensor([1, 1])]; int32 key_states_231_groups_0 = const()[name = string("key_states_231_groups_0"), val = int32(1)]; tensor key_states_231 = conv(dilations = key_states_231_dilations_0, groups = key_states_231_groups_0, pad = key_states_231_pad_0, pad_type = key_states_231_pad_type_0, strides = key_states_231_strides_0, weight = model_model_layers_23_self_attn_k_proj_weight_palettized, x = var_13341)[name = string("key_states_231")]; string value_states_185_pad_type_0 = const()[name = string("value_states_185_pad_type_0"), val = string("valid")]; tensor value_states_185_strides_0 = const()[name = string("value_states_185_strides_0"), val = tensor([1, 1])]; tensor value_states_185_pad_0 = const()[name = string("value_states_185_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_185_dilations_0 = const()[name = string("value_states_185_dilations_0"), val = tensor([1, 1])]; int32 value_states_185_groups_0 = const()[name = string("value_states_185_groups_0"), val = int32(1)]; tensor value_states_185 = conv(dilations = value_states_185_dilations_0, groups = value_states_185_groups_0, pad = value_states_185_pad_0, pad_type = value_states_185_pad_type_0, strides = value_states_185_strides_0, weight = model_model_layers_23_self_attn_v_proj_weight_palettized, x = var_13341)[name = string("value_states_185")]; tensor var_13383 = const()[name = string("op_13383"), val = tensor([1, 16, 128, 64])]; tensor var_13384 = reshape(shape = var_13383, x = query_states_185)[name = string("op_13384")]; tensor var_13389 = const()[name = string("op_13389"), val = tensor([0, 1, 3, 2])]; tensor var_13394 = const()[name = string("op_13394"), val = tensor([1, 8, 128, 64])]; tensor var_13395 = reshape(shape = var_13394, x = key_states_231)[name = string("op_13395")]; tensor var_13400 = const()[name = string("op_13400"), val = tensor([0, 1, 3, 2])]; tensor var_13405 = const()[name = string("op_13405"), val = tensor([1, 8, 128, 64])]; tensor var_13406 = reshape(shape = var_13405, x = value_states_185)[name = string("op_13406")]; tensor var_13411 = const()[name = string("op_13411"), val = tensor([0, 1, 3, 2])]; tensor mean_187_axes_0 = const()[name = string("mean_187_axes_0"), val = tensor([-1])]; bool mean_187_keep_dims_0 = const()[name = string("mean_187_keep_dims_0"), val = bool(true)]; tensor x_461 = transpose(perm = var_13389, x = var_13384)[name = string("transpose_43")]; tensor mean_187 = reduce_mean(axes = mean_187_axes_0, keep_dims = mean_187_keep_dims_0, x = x_461)[name = string("mean_187")]; tensor input_419 = sub(x = x_461, y = mean_187)[name = string("input_419")]; tensor var_13428_axes_0 = const()[name = string("op_13428_axes_0"), val = tensor([-1])]; tensor model_model_layers_23_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_23_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1084036480)))]; fp16 var_13416_to_fp16 = const()[name = string("op_13416_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_13428_cast_fp16 = layer_norm(axes = var_13428_axes_0, epsilon = var_13416_to_fp16, gamma = model_model_layers_23_self_attn_q_norm_weight_to_fp16, x = input_419)[name = string("op_13428_cast_fp16")]; tensor mean_189_axes_0 = const()[name = string("mean_189_axes_0"), val = tensor([-1])]; bool mean_189_keep_dims_0 = const()[name = string("mean_189_keep_dims_0"), val = bool(true)]; tensor x_463 = transpose(perm = var_13400, x = var_13395)[name = string("transpose_42")]; tensor mean_189 = reduce_mean(axes = mean_189_axes_0, keep_dims = mean_189_keep_dims_0, x = x_463)[name = string("mean_189")]; tensor input_421 = sub(x = x_463, y = mean_189)[name = string("input_421")]; tensor var_13446_axes_0 = const()[name = string("op_13446_axes_0"), val = tensor([-1])]; tensor model_model_layers_23_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_23_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1084036800)))]; fp16 var_13434_to_fp16 = const()[name = string("op_13434_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_13446_cast_fp16 = layer_norm(axes = var_13446_axes_0, epsilon = var_13434_to_fp16, gamma = model_model_layers_23_self_attn_k_norm_weight_to_fp16, x = input_421)[name = string("op_13446_cast_fp16")]; tensor var_13461 = mul(x = var_13428_cast_fp16, y = cos_5)[name = string("op_13461")]; tensor x1_93_begin_0 = const()[name = string("x1_93_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_93_end_0 = const()[name = string("x1_93_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_93_end_mask_0 = const()[name = string("x1_93_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_93 = slice_by_index(begin = x1_93_begin_0, end = x1_93_end_0, end_mask = x1_93_end_mask_0, x = var_13428_cast_fp16)[name = string("x1_93")]; tensor x2_93_begin_0 = const()[name = string("x2_93_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_93_end_0 = const()[name = string("x2_93_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_93_end_mask_0 = const()[name = string("x2_93_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_93 = slice_by_index(begin = x2_93_begin_0, end = x2_93_end_0, end_mask = x2_93_end_mask_0, x = var_13428_cast_fp16)[name = string("x2_93")]; fp16 const_513_promoted = const()[name = string("const_513_promoted"), val = fp16(-0x1p+0)]; tensor var_13482 = mul(x = x2_93, y = const_513_promoted)[name = string("op_13482")]; int32 var_13484 = const()[name = string("op_13484"), val = int32(-1)]; bool var_13485_interleave_0 = const()[name = string("op_13485_interleave_0"), val = bool(false)]; tensor var_13485 = concat(axis = var_13484, interleave = var_13485_interleave_0, values = (var_13482, x1_93))[name = string("op_13485")]; tensor var_13486 = mul(x = var_13485, y = sin_5)[name = string("op_13486")]; tensor query_states_187 = add(x = var_13461, y = var_13486)[name = string("query_states_187")]; tensor var_13489 = mul(x = var_13446_cast_fp16, y = cos_5)[name = string("op_13489")]; tensor x1_95_begin_0 = const()[name = string("x1_95_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_95_end_0 = const()[name = string("x1_95_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_95_end_mask_0 = const()[name = string("x1_95_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_95 = slice_by_index(begin = x1_95_begin_0, end = x1_95_end_0, end_mask = x1_95_end_mask_0, x = var_13446_cast_fp16)[name = string("x1_95")]; tensor x2_95_begin_0 = const()[name = string("x2_95_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_95_end_0 = const()[name = string("x2_95_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_95_end_mask_0 = const()[name = string("x2_95_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_95 = slice_by_index(begin = x2_95_begin_0, end = x2_95_end_0, end_mask = x2_95_end_mask_0, x = var_13446_cast_fp16)[name = string("x2_95")]; fp16 const_516_promoted = const()[name = string("const_516_promoted"), val = fp16(-0x1p+0)]; tensor var_13510 = mul(x = x2_95, y = const_516_promoted)[name = string("op_13510")]; int32 var_13512 = const()[name = string("op_13512"), val = int32(-1)]; bool var_13513_interleave_0 = const()[name = string("op_13513_interleave_0"), val = bool(false)]; tensor var_13513 = concat(axis = var_13512, interleave = var_13513_interleave_0, values = (var_13510, x1_95))[name = string("op_13513")]; tensor var_13514 = mul(x = var_13513, y = sin_5)[name = string("op_13514")]; tensor key_states_233 = add(x = var_13489, y = var_13514)[name = string("key_states_233")]; tensor expand_dims_276 = const()[name = string("expand_dims_276"), val = tensor([23])]; tensor expand_dims_277 = const()[name = string("expand_dims_277"), val = tensor([0])]; tensor expand_dims_279 = const()[name = string("expand_dims_279"), val = tensor([0])]; tensor expand_dims_280 = const()[name = string("expand_dims_280"), val = tensor([24])]; int32 concat_416_axis_0 = const()[name = string("concat_416_axis_0"), val = int32(0)]; bool concat_416_interleave_0 = const()[name = string("concat_416_interleave_0"), val = bool(false)]; tensor concat_416 = concat(axis = concat_416_axis_0, interleave = concat_416_interleave_0, values = (expand_dims_276, expand_dims_277, current_pos, expand_dims_279))[name = string("concat_416")]; tensor concat_417_values1_0 = const()[name = string("concat_417_values1_0"), val = tensor([0])]; tensor concat_417_values3_0 = const()[name = string("concat_417_values3_0"), val = tensor([0])]; int32 concat_417_axis_0 = const()[name = string("concat_417_axis_0"), val = int32(0)]; bool concat_417_interleave_0 = const()[name = string("concat_417_interleave_0"), val = bool(false)]; tensor concat_417 = concat(axis = concat_417_axis_0, interleave = concat_417_interleave_0, values = (expand_dims_280, concat_417_values1_0, var_1760, concat_417_values3_0))[name = string("concat_417")]; tensor model_model_kv_cache_0_internal_tensor_assign_47_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_47_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_47_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_47_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_47_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_47_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_47_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_47_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_47_cast_fp16 = slice_update(begin = concat_416, begin_mask = model_model_kv_cache_0_internal_tensor_assign_47_begin_mask_0, end = concat_417, end_mask = model_model_kv_cache_0_internal_tensor_assign_47_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_47_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_47_stride_0, update = key_states_233, x = coreml_update_state_101)[name = string("model_model_kv_cache_0_internal_tensor_assign_47_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_47_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_102_write_state")]; tensor coreml_update_state_102 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_102")]; tensor expand_dims_282 = const()[name = string("expand_dims_282"), val = tensor([51])]; tensor expand_dims_283 = const()[name = string("expand_dims_283"), val = tensor([0])]; tensor expand_dims_285 = const()[name = string("expand_dims_285"), val = tensor([0])]; tensor expand_dims_286 = const()[name = string("expand_dims_286"), val = tensor([52])]; int32 concat_420_axis_0 = const()[name = string("concat_420_axis_0"), val = int32(0)]; bool concat_420_interleave_0 = const()[name = string("concat_420_interleave_0"), val = bool(false)]; tensor concat_420 = concat(axis = concat_420_axis_0, interleave = concat_420_interleave_0, values = (expand_dims_282, expand_dims_283, current_pos, expand_dims_285))[name = string("concat_420")]; tensor concat_421_values1_0 = const()[name = string("concat_421_values1_0"), val = tensor([0])]; tensor concat_421_values3_0 = const()[name = string("concat_421_values3_0"), val = tensor([0])]; int32 concat_421_axis_0 = const()[name = string("concat_421_axis_0"), val = int32(0)]; bool concat_421_interleave_0 = const()[name = string("concat_421_interleave_0"), val = bool(false)]; tensor concat_421 = concat(axis = concat_421_axis_0, interleave = concat_421_interleave_0, values = (expand_dims_286, concat_421_values1_0, var_1760, concat_421_values3_0))[name = string("concat_421")]; tensor model_model_kv_cache_0_internal_tensor_assign_48_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_48_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_48_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_48_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_48_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_48_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_48_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_48_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_187 = transpose(perm = var_13411, x = var_13406)[name = string("transpose_41")]; tensor model_model_kv_cache_0_internal_tensor_assign_48_cast_fp16 = slice_update(begin = concat_420, begin_mask = model_model_kv_cache_0_internal_tensor_assign_48_begin_mask_0, end = concat_421, end_mask = model_model_kv_cache_0_internal_tensor_assign_48_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_48_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_48_stride_0, update = value_states_187, x = coreml_update_state_102)[name = string("model_model_kv_cache_0_internal_tensor_assign_48_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_48_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_103_write_state")]; tensor coreml_update_state_103 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_103")]; tensor var_13585_begin_0 = const()[name = string("op_13585_begin_0"), val = tensor([23, 0, 0, 0])]; tensor var_13585_end_0 = const()[name = string("op_13585_end_0"), val = tensor([24, 8, 1024, 128])]; tensor var_13585_end_mask_0 = const()[name = string("op_13585_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_13585_cast_fp16 = slice_by_index(begin = var_13585_begin_0, end = var_13585_end_0, end_mask = var_13585_end_mask_0, x = coreml_update_state_103)[name = string("op_13585_cast_fp16")]; tensor K_layer_cache_47_axes_0 = const()[name = string("K_layer_cache_47_axes_0"), val = tensor([0])]; tensor K_layer_cache_47_cast_fp16 = squeeze(axes = K_layer_cache_47_axes_0, x = var_13585_cast_fp16)[name = string("K_layer_cache_47_cast_fp16")]; tensor var_13592_begin_0 = const()[name = string("op_13592_begin_0"), val = tensor([51, 0, 0, 0])]; tensor var_13592_end_0 = const()[name = string("op_13592_end_0"), val = tensor([52, 8, 1024, 128])]; tensor var_13592_end_mask_0 = const()[name = string("op_13592_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_13592_cast_fp16 = slice_by_index(begin = var_13592_begin_0, end = var_13592_end_0, end_mask = var_13592_end_mask_0, x = coreml_update_state_103)[name = string("op_13592_cast_fp16")]; tensor V_layer_cache_47_axes_0 = const()[name = string("V_layer_cache_47_axes_0"), val = tensor([0])]; tensor V_layer_cache_47_cast_fp16 = squeeze(axes = V_layer_cache_47_axes_0, x = var_13592_cast_fp16)[name = string("V_layer_cache_47_cast_fp16")]; tensor x_467_axes_0 = const()[name = string("x_467_axes_0"), val = tensor([1])]; tensor x_467_cast_fp16 = expand_dims(axes = x_467_axes_0, x = K_layer_cache_47_cast_fp16)[name = string("x_467_cast_fp16")]; tensor var_13621 = const()[name = string("op_13621"), val = tensor([1, 2, 1, 1])]; tensor x_469_cast_fp16 = tile(reps = var_13621, x = x_467_cast_fp16)[name = string("x_469_cast_fp16")]; tensor var_13633 = const()[name = string("op_13633"), val = tensor([1, -1, 1024, 128])]; tensor key_states_237_cast_fp16 = reshape(shape = var_13633, x = x_469_cast_fp16)[name = string("key_states_237_cast_fp16")]; tensor x_473_axes_0 = const()[name = string("x_473_axes_0"), val = tensor([1])]; tensor x_473_cast_fp16 = expand_dims(axes = x_473_axes_0, x = V_layer_cache_47_cast_fp16)[name = string("x_473_cast_fp16")]; tensor var_13641 = const()[name = string("op_13641"), val = tensor([1, 2, 1, 1])]; tensor x_475_cast_fp16 = tile(reps = var_13641, x = x_473_cast_fp16)[name = string("x_475_cast_fp16")]; bool var_13668_transpose_x_0 = const()[name = string("op_13668_transpose_x_0"), val = bool(false)]; bool var_13668_transpose_y_0 = const()[name = string("op_13668_transpose_y_0"), val = bool(true)]; tensor var_13668 = matmul(transpose_x = var_13668_transpose_x_0, transpose_y = var_13668_transpose_y_0, x = query_states_187, y = key_states_237_cast_fp16)[name = string("op_13668")]; fp16 var_13669_to_fp16 = const()[name = string("op_13669_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_93_cast_fp16 = mul(x = var_13668, y = var_13669_to_fp16)[name = string("attn_weights_93_cast_fp16")]; tensor attn_weights_95_cast_fp16 = add(x = attn_weights_93_cast_fp16, y = causal_mask)[name = string("attn_weights_95_cast_fp16")]; int32 var_13704 = const()[name = string("op_13704"), val = int32(-1)]; tensor var_13706_cast_fp16 = softmax(axis = var_13704, x = attn_weights_95_cast_fp16)[name = string("op_13706_cast_fp16")]; tensor concat_426 = const()[name = string("concat_426"), val = tensor([16, 64, 1024])]; tensor reshape_69_cast_fp16 = reshape(shape = concat_426, x = var_13706_cast_fp16)[name = string("reshape_69_cast_fp16")]; tensor concat_427 = const()[name = string("concat_427"), val = tensor([16, 1024, 128])]; tensor reshape_70_cast_fp16 = reshape(shape = concat_427, x = x_475_cast_fp16)[name = string("reshape_70_cast_fp16")]; bool matmul_23_transpose_x_0 = const()[name = string("matmul_23_transpose_x_0"), val = bool(false)]; bool matmul_23_transpose_y_0 = const()[name = string("matmul_23_transpose_y_0"), val = bool(false)]; tensor matmul_23_cast_fp16 = matmul(transpose_x = matmul_23_transpose_x_0, transpose_y = matmul_23_transpose_y_0, x = reshape_69_cast_fp16, y = reshape_70_cast_fp16)[name = string("matmul_23_cast_fp16")]; tensor concat_431 = const()[name = string("concat_431"), val = tensor([1, 16, 64, 128])]; tensor reshape_71_cast_fp16 = reshape(shape = concat_431, x = matmul_23_cast_fp16)[name = string("reshape_71_cast_fp16")]; tensor var_13718_perm_0 = const()[name = string("op_13718_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_13737 = const()[name = string("op_13737"), val = tensor([1, 64, 2048])]; tensor var_13718_cast_fp16 = transpose(perm = var_13718_perm_0, x = reshape_71_cast_fp16)[name = string("transpose_40")]; tensor attn_output_235_cast_fp16 = reshape(shape = var_13737, x = var_13718_cast_fp16)[name = string("attn_output_235_cast_fp16")]; tensor var_13742 = const()[name = string("op_13742"), val = tensor([0, 2, 1])]; string var_13758_pad_type_0 = const()[name = string("op_13758_pad_type_0"), val = string("valid")]; int32 var_13758_groups_0 = const()[name = string("op_13758_groups_0"), val = int32(1)]; tensor var_13758_strides_0 = const()[name = string("op_13758_strides_0"), val = tensor([1])]; tensor var_13758_pad_0 = const()[name = string("op_13758_pad_0"), val = tensor([0, 0])]; tensor var_13758_dilations_0 = const()[name = string("op_13758_dilations_0"), val = tensor([1])]; tensor squeeze_23_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1084037120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1087182912))))[name = string("squeeze_23_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_13743_cast_fp16 = transpose(perm = var_13742, x = attn_output_235_cast_fp16)[name = string("transpose_39")]; tensor var_13758_cast_fp16 = conv(dilations = var_13758_dilations_0, groups = var_13758_groups_0, pad = var_13758_pad_0, pad_type = var_13758_pad_type_0, strides = var_13758_strides_0, weight = squeeze_23_cast_fp16_to_fp32_to_fp16_palettized, x = var_13743_cast_fp16)[name = string("op_13758_cast_fp16")]; tensor var_13762 = const()[name = string("op_13762"), val = tensor([0, 2, 1])]; tensor attn_output_239_cast_fp16 = transpose(perm = var_13762, x = var_13758_cast_fp16)[name = string("transpose_38")]; tensor hidden_states_143_cast_fp16 = add(x = hidden_states_139_cast_fp16, y = attn_output_239_cast_fp16)[name = string("hidden_states_143_cast_fp16")]; tensor mean_191_axes_0 = const()[name = string("mean_191_axes_0"), val = tensor([-1])]; bool mean_191_keep_dims_0 = const()[name = string("mean_191_keep_dims_0"), val = bool(true)]; tensor mean_191_cast_fp16 = reduce_mean(axes = mean_191_axes_0, keep_dims = mean_191_keep_dims_0, x = hidden_states_143_cast_fp16)[name = string("mean_191_cast_fp16")]; tensor input_425_cast_fp16 = sub(x = hidden_states_143_cast_fp16, y = mean_191_cast_fp16)[name = string("input_425_cast_fp16")]; tensor var_13781_axes_0 = const()[name = string("op_13781_axes_0"), val = tensor([-1])]; tensor model_model_layers_23_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_23_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1087215744)))]; fp16 var_13769_to_fp16 = const()[name = string("op_13769_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_13781_cast_fp16 = layer_norm(axes = var_13781_axes_0, epsilon = var_13769_to_fp16, gamma = model_model_layers_23_post_attention_layernorm_weight_to_fp16, x = input_425_cast_fp16)[name = string("op_13781_cast_fp16")]; tensor var_13795 = const()[name = string("op_13795"), val = tensor([0, 2, 1])]; tensor input_427_axes_0 = const()[name = string("input_427_axes_0"), val = tensor([2])]; tensor var_13796 = transpose(perm = var_13795, x = var_13781_cast_fp16)[name = string("transpose_37")]; tensor input_427 = expand_dims(axes = input_427_axes_0, x = var_13796)[name = string("input_427")]; string input_429_pad_type_0 = const()[name = string("input_429_pad_type_0"), val = string("valid")]; tensor input_429_strides_0 = const()[name = string("input_429_strides_0"), val = tensor([1, 1])]; tensor input_429_pad_0 = const()[name = string("input_429_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_429_dilations_0 = const()[name = string("input_429_dilations_0"), val = tensor([1, 1])]; int32 input_429_groups_0 = const()[name = string("input_429_groups_0"), val = int32(1)]; tensor input_429 = conv(dilations = input_429_dilations_0, groups = input_429_groups_0, pad = input_429_pad_0, pad_type = input_429_pad_type_0, strides = input_429_strides_0, weight = model_model_layers_23_mlp_gate_proj_weight_palettized, x = input_427)[name = string("input_429")]; string b_47_pad_type_0 = const()[name = string("b_47_pad_type_0"), val = string("valid")]; tensor b_47_strides_0 = const()[name = string("b_47_strides_0"), val = tensor([1, 1])]; tensor b_47_pad_0 = const()[name = string("b_47_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_47_dilations_0 = const()[name = string("b_47_dilations_0"), val = tensor([1, 1])]; int32 b_47_groups_0 = const()[name = string("b_47_groups_0"), val = int32(1)]; tensor b_47 = conv(dilations = b_47_dilations_0, groups = b_47_groups_0, pad = b_47_pad_0, pad_type = b_47_pad_type_0, strides = b_47_strides_0, weight = model_model_layers_23_mlp_up_proj_weight_palettized, x = input_427)[name = string("b_47")]; tensor c_47 = silu(x = input_429)[name = string("c_47")]; tensor input_431 = mul(x = c_47, y = b_47)[name = string("input_431")]; string e_47_pad_type_0 = const()[name = string("e_47_pad_type_0"), val = string("valid")]; tensor e_47_strides_0 = const()[name = string("e_47_strides_0"), val = tensor([1, 1])]; tensor e_47_pad_0 = const()[name = string("e_47_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_47_dilations_0 = const()[name = string("e_47_dilations_0"), val = tensor([1, 1])]; int32 e_47_groups_0 = const()[name = string("e_47_groups_0"), val = int32(1)]; tensor e_47 = conv(dilations = e_47_dilations_0, groups = e_47_groups_0, pad = e_47_pad_0, pad_type = e_47_pad_type_0, strides = e_47_strides_0, weight = model_model_layers_23_mlp_down_proj_weight_palettized, x = input_431)[name = string("e_47")]; tensor var_13818_axes_0 = const()[name = string("op_13818_axes_0"), val = tensor([2])]; tensor var_13818 = squeeze(axes = var_13818_axes_0, x = e_47)[name = string("op_13818")]; tensor var_13819 = const()[name = string("op_13819"), val = tensor([0, 2, 1])]; tensor var_13820 = transpose(perm = var_13819, x = var_13818)[name = string("transpose_36")]; tensor hidden_states_145_cast_fp16 = add(x = hidden_states_143_cast_fp16, y = var_13820)[name = string("hidden_states_145_cast_fp16")]; tensor mean_193_axes_0 = const()[name = string("mean_193_axes_0"), val = tensor([-1])]; bool mean_193_keep_dims_0 = const()[name = string("mean_193_keep_dims_0"), val = bool(true)]; tensor mean_193_cast_fp16 = reduce_mean(axes = mean_193_axes_0, keep_dims = mean_193_keep_dims_0, x = hidden_states_145_cast_fp16)[name = string("mean_193_cast_fp16")]; tensor input_433_cast_fp16 = sub(x = hidden_states_145_cast_fp16, y = mean_193_cast_fp16)[name = string("input_433_cast_fp16")]; tensor var_13838_axes_0 = const()[name = string("op_13838_axes_0"), val = tensor([-1])]; tensor model_model_layers_24_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_24_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1087219904)))]; fp16 var_13826_to_fp16 = const()[name = string("op_13826_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_13838_cast_fp16 = layer_norm(axes = var_13838_axes_0, epsilon = var_13826_to_fp16, gamma = model_model_layers_24_input_layernorm_weight_to_fp16, x = input_433_cast_fp16)[name = string("op_13838_cast_fp16")]; tensor var_13850 = const()[name = string("op_13850"), val = tensor([0, 2, 1])]; tensor var_13853_axes_0 = const()[name = string("op_13853_axes_0"), val = tensor([2])]; tensor var_13851 = transpose(perm = var_13850, x = var_13838_cast_fp16)[name = string("transpose_35")]; tensor var_13853 = expand_dims(axes = var_13853_axes_0, x = var_13851)[name = string("op_13853")]; string query_states_193_pad_type_0 = const()[name = string("query_states_193_pad_type_0"), val = string("valid")]; tensor query_states_193_strides_0 = const()[name = string("query_states_193_strides_0"), val = tensor([1, 1])]; tensor query_states_193_pad_0 = const()[name = string("query_states_193_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_193_dilations_0 = const()[name = string("query_states_193_dilations_0"), val = tensor([1, 1])]; int32 query_states_193_groups_0 = const()[name = string("query_states_193_groups_0"), val = int32(1)]; tensor query_states_193 = conv(dilations = query_states_193_dilations_0, groups = query_states_193_groups_0, pad = query_states_193_pad_0, pad_type = query_states_193_pad_type_0, strides = query_states_193_strides_0, weight = model_model_layers_24_self_attn_q_proj_weight_palettized, x = var_13853)[name = string("query_states_193")]; string key_states_241_pad_type_0 = const()[name = string("key_states_241_pad_type_0"), val = string("valid")]; tensor key_states_241_strides_0 = const()[name = string("key_states_241_strides_0"), val = tensor([1, 1])]; tensor key_states_241_pad_0 = const()[name = string("key_states_241_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_241_dilations_0 = const()[name = string("key_states_241_dilations_0"), val = tensor([1, 1])]; int32 key_states_241_groups_0 = const()[name = string("key_states_241_groups_0"), val = int32(1)]; tensor key_states_241 = conv(dilations = key_states_241_dilations_0, groups = key_states_241_groups_0, pad = key_states_241_pad_0, pad_type = key_states_241_pad_type_0, strides = key_states_241_strides_0, weight = model_model_layers_24_self_attn_k_proj_weight_palettized, x = var_13853)[name = string("key_states_241")]; string value_states_193_pad_type_0 = const()[name = string("value_states_193_pad_type_0"), val = string("valid")]; tensor value_states_193_strides_0 = const()[name = string("value_states_193_strides_0"), val = tensor([1, 1])]; tensor value_states_193_pad_0 = const()[name = string("value_states_193_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_193_dilations_0 = const()[name = string("value_states_193_dilations_0"), val = tensor([1, 1])]; int32 value_states_193_groups_0 = const()[name = string("value_states_193_groups_0"), val = int32(1)]; tensor value_states_193 = conv(dilations = value_states_193_dilations_0, groups = value_states_193_groups_0, pad = value_states_193_pad_0, pad_type = value_states_193_pad_type_0, strides = value_states_193_strides_0, weight = model_model_layers_24_self_attn_v_proj_weight_palettized, x = var_13853)[name = string("value_states_193")]; tensor var_13895 = const()[name = string("op_13895"), val = tensor([1, 16, 128, 64])]; tensor var_13896 = reshape(shape = var_13895, x = query_states_193)[name = string("op_13896")]; tensor var_13901 = const()[name = string("op_13901"), val = tensor([0, 1, 3, 2])]; tensor var_13906 = const()[name = string("op_13906"), val = tensor([1, 8, 128, 64])]; tensor var_13907 = reshape(shape = var_13906, x = key_states_241)[name = string("op_13907")]; tensor var_13912 = const()[name = string("op_13912"), val = tensor([0, 1, 3, 2])]; tensor var_13917 = const()[name = string("op_13917"), val = tensor([1, 8, 128, 64])]; tensor var_13918 = reshape(shape = var_13917, x = value_states_193)[name = string("op_13918")]; tensor var_13923 = const()[name = string("op_13923"), val = tensor([0, 1, 3, 2])]; tensor mean_195_axes_0 = const()[name = string("mean_195_axes_0"), val = tensor([-1])]; bool mean_195_keep_dims_0 = const()[name = string("mean_195_keep_dims_0"), val = bool(true)]; tensor x_481 = transpose(perm = var_13901, x = var_13896)[name = string("transpose_34")]; tensor mean_195 = reduce_mean(axes = mean_195_axes_0, keep_dims = mean_195_keep_dims_0, x = x_481)[name = string("mean_195")]; tensor input_437 = sub(x = x_481, y = mean_195)[name = string("input_437")]; tensor var_13940_axes_0 = const()[name = string("op_13940_axes_0"), val = tensor([-1])]; tensor model_model_layers_24_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_24_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1087224064)))]; fp16 var_13928_to_fp16 = const()[name = string("op_13928_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_13940_cast_fp16 = layer_norm(axes = var_13940_axes_0, epsilon = var_13928_to_fp16, gamma = model_model_layers_24_self_attn_q_norm_weight_to_fp16, x = input_437)[name = string("op_13940_cast_fp16")]; tensor mean_197_axes_0 = const()[name = string("mean_197_axes_0"), val = tensor([-1])]; bool mean_197_keep_dims_0 = const()[name = string("mean_197_keep_dims_0"), val = bool(true)]; tensor x_483 = transpose(perm = var_13912, x = var_13907)[name = string("transpose_33")]; tensor mean_197 = reduce_mean(axes = mean_197_axes_0, keep_dims = mean_197_keep_dims_0, x = x_483)[name = string("mean_197")]; tensor input_439 = sub(x = x_483, y = mean_197)[name = string("input_439")]; tensor var_13958_axes_0 = const()[name = string("op_13958_axes_0"), val = tensor([-1])]; tensor model_model_layers_24_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_24_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1087224384)))]; fp16 var_13946_to_fp16 = const()[name = string("op_13946_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_13958_cast_fp16 = layer_norm(axes = var_13958_axes_0, epsilon = var_13946_to_fp16, gamma = model_model_layers_24_self_attn_k_norm_weight_to_fp16, x = input_439)[name = string("op_13958_cast_fp16")]; tensor var_13973 = mul(x = var_13940_cast_fp16, y = cos_5)[name = string("op_13973")]; tensor x1_97_begin_0 = const()[name = string("x1_97_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_97_end_0 = const()[name = string("x1_97_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_97_end_mask_0 = const()[name = string("x1_97_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_97 = slice_by_index(begin = x1_97_begin_0, end = x1_97_end_0, end_mask = x1_97_end_mask_0, x = var_13940_cast_fp16)[name = string("x1_97")]; tensor x2_97_begin_0 = const()[name = string("x2_97_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_97_end_0 = const()[name = string("x2_97_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_97_end_mask_0 = const()[name = string("x2_97_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_97 = slice_by_index(begin = x2_97_begin_0, end = x2_97_end_0, end_mask = x2_97_end_mask_0, x = var_13940_cast_fp16)[name = string("x2_97")]; fp16 const_535_promoted = const()[name = string("const_535_promoted"), val = fp16(-0x1p+0)]; tensor var_13994 = mul(x = x2_97, y = const_535_promoted)[name = string("op_13994")]; int32 var_13996 = const()[name = string("op_13996"), val = int32(-1)]; bool var_13997_interleave_0 = const()[name = string("op_13997_interleave_0"), val = bool(false)]; tensor var_13997 = concat(axis = var_13996, interleave = var_13997_interleave_0, values = (var_13994, x1_97))[name = string("op_13997")]; tensor var_13998 = mul(x = var_13997, y = sin_5)[name = string("op_13998")]; tensor query_states_195 = add(x = var_13973, y = var_13998)[name = string("query_states_195")]; tensor var_14001 = mul(x = var_13958_cast_fp16, y = cos_5)[name = string("op_14001")]; tensor x1_99_begin_0 = const()[name = string("x1_99_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_99_end_0 = const()[name = string("x1_99_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_99_end_mask_0 = const()[name = string("x1_99_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_99 = slice_by_index(begin = x1_99_begin_0, end = x1_99_end_0, end_mask = x1_99_end_mask_0, x = var_13958_cast_fp16)[name = string("x1_99")]; tensor x2_99_begin_0 = const()[name = string("x2_99_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_99_end_0 = const()[name = string("x2_99_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_99_end_mask_0 = const()[name = string("x2_99_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_99 = slice_by_index(begin = x2_99_begin_0, end = x2_99_end_0, end_mask = x2_99_end_mask_0, x = var_13958_cast_fp16)[name = string("x2_99")]; fp16 const_538_promoted = const()[name = string("const_538_promoted"), val = fp16(-0x1p+0)]; tensor var_14022 = mul(x = x2_99, y = const_538_promoted)[name = string("op_14022")]; int32 var_14024 = const()[name = string("op_14024"), val = int32(-1)]; bool var_14025_interleave_0 = const()[name = string("op_14025_interleave_0"), val = bool(false)]; tensor var_14025 = concat(axis = var_14024, interleave = var_14025_interleave_0, values = (var_14022, x1_99))[name = string("op_14025")]; tensor var_14026 = mul(x = var_14025, y = sin_5)[name = string("op_14026")]; tensor key_states_243 = add(x = var_14001, y = var_14026)[name = string("key_states_243")]; tensor expand_dims_288 = const()[name = string("expand_dims_288"), val = tensor([24])]; tensor expand_dims_289 = const()[name = string("expand_dims_289"), val = tensor([0])]; tensor expand_dims_291 = const()[name = string("expand_dims_291"), val = tensor([0])]; tensor expand_dims_292 = const()[name = string("expand_dims_292"), val = tensor([25])]; int32 concat_434_axis_0 = const()[name = string("concat_434_axis_0"), val = int32(0)]; bool concat_434_interleave_0 = const()[name = string("concat_434_interleave_0"), val = bool(false)]; tensor concat_434 = concat(axis = concat_434_axis_0, interleave = concat_434_interleave_0, values = (expand_dims_288, expand_dims_289, current_pos, expand_dims_291))[name = string("concat_434")]; tensor concat_435_values1_0 = const()[name = string("concat_435_values1_0"), val = tensor([0])]; tensor concat_435_values3_0 = const()[name = string("concat_435_values3_0"), val = tensor([0])]; int32 concat_435_axis_0 = const()[name = string("concat_435_axis_0"), val = int32(0)]; bool concat_435_interleave_0 = const()[name = string("concat_435_interleave_0"), val = bool(false)]; tensor concat_435 = concat(axis = concat_435_axis_0, interleave = concat_435_interleave_0, values = (expand_dims_292, concat_435_values1_0, var_1760, concat_435_values3_0))[name = string("concat_435")]; tensor model_model_kv_cache_0_internal_tensor_assign_49_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_49_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_49_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_49_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_49_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_49_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_49_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_49_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_49_cast_fp16 = slice_update(begin = concat_434, begin_mask = model_model_kv_cache_0_internal_tensor_assign_49_begin_mask_0, end = concat_435, end_mask = model_model_kv_cache_0_internal_tensor_assign_49_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_49_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_49_stride_0, update = key_states_243, x = coreml_update_state_103)[name = string("model_model_kv_cache_0_internal_tensor_assign_49_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_49_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_104_write_state")]; tensor coreml_update_state_104 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_104")]; tensor expand_dims_294 = const()[name = string("expand_dims_294"), val = tensor([52])]; tensor expand_dims_295 = const()[name = string("expand_dims_295"), val = tensor([0])]; tensor expand_dims_297 = const()[name = string("expand_dims_297"), val = tensor([0])]; tensor expand_dims_298 = const()[name = string("expand_dims_298"), val = tensor([53])]; int32 concat_438_axis_0 = const()[name = string("concat_438_axis_0"), val = int32(0)]; bool concat_438_interleave_0 = const()[name = string("concat_438_interleave_0"), val = bool(false)]; tensor concat_438 = concat(axis = concat_438_axis_0, interleave = concat_438_interleave_0, values = (expand_dims_294, expand_dims_295, current_pos, expand_dims_297))[name = string("concat_438")]; tensor concat_439_values1_0 = const()[name = string("concat_439_values1_0"), val = tensor([0])]; tensor concat_439_values3_0 = const()[name = string("concat_439_values3_0"), val = tensor([0])]; int32 concat_439_axis_0 = const()[name = string("concat_439_axis_0"), val = int32(0)]; bool concat_439_interleave_0 = const()[name = string("concat_439_interleave_0"), val = bool(false)]; tensor concat_439 = concat(axis = concat_439_axis_0, interleave = concat_439_interleave_0, values = (expand_dims_298, concat_439_values1_0, var_1760, concat_439_values3_0))[name = string("concat_439")]; tensor model_model_kv_cache_0_internal_tensor_assign_50_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_50_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_50_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_50_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_50_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_50_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_50_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_50_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_195 = transpose(perm = var_13923, x = var_13918)[name = string("transpose_32")]; tensor model_model_kv_cache_0_internal_tensor_assign_50_cast_fp16 = slice_update(begin = concat_438, begin_mask = model_model_kv_cache_0_internal_tensor_assign_50_begin_mask_0, end = concat_439, end_mask = model_model_kv_cache_0_internal_tensor_assign_50_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_50_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_50_stride_0, update = value_states_195, x = coreml_update_state_104)[name = string("model_model_kv_cache_0_internal_tensor_assign_50_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_50_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_105_write_state")]; tensor coreml_update_state_105 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_105")]; tensor var_14097_begin_0 = const()[name = string("op_14097_begin_0"), val = tensor([24, 0, 0, 0])]; tensor var_14097_end_0 = const()[name = string("op_14097_end_0"), val = tensor([25, 8, 1024, 128])]; tensor var_14097_end_mask_0 = const()[name = string("op_14097_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_14097_cast_fp16 = slice_by_index(begin = var_14097_begin_0, end = var_14097_end_0, end_mask = var_14097_end_mask_0, x = coreml_update_state_105)[name = string("op_14097_cast_fp16")]; tensor K_layer_cache_49_axes_0 = const()[name = string("K_layer_cache_49_axes_0"), val = tensor([0])]; tensor K_layer_cache_49_cast_fp16 = squeeze(axes = K_layer_cache_49_axes_0, x = var_14097_cast_fp16)[name = string("K_layer_cache_49_cast_fp16")]; tensor var_14104_begin_0 = const()[name = string("op_14104_begin_0"), val = tensor([52, 0, 0, 0])]; tensor var_14104_end_0 = const()[name = string("op_14104_end_0"), val = tensor([53, 8, 1024, 128])]; tensor var_14104_end_mask_0 = const()[name = string("op_14104_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_14104_cast_fp16 = slice_by_index(begin = var_14104_begin_0, end = var_14104_end_0, end_mask = var_14104_end_mask_0, x = coreml_update_state_105)[name = string("op_14104_cast_fp16")]; tensor V_layer_cache_49_axes_0 = const()[name = string("V_layer_cache_49_axes_0"), val = tensor([0])]; tensor V_layer_cache_49_cast_fp16 = squeeze(axes = V_layer_cache_49_axes_0, x = var_14104_cast_fp16)[name = string("V_layer_cache_49_cast_fp16")]; tensor x_487_axes_0 = const()[name = string("x_487_axes_0"), val = tensor([1])]; tensor x_487_cast_fp16 = expand_dims(axes = x_487_axes_0, x = K_layer_cache_49_cast_fp16)[name = string("x_487_cast_fp16")]; tensor var_14133 = const()[name = string("op_14133"), val = tensor([1, 2, 1, 1])]; tensor x_489_cast_fp16 = tile(reps = var_14133, x = x_487_cast_fp16)[name = string("x_489_cast_fp16")]; tensor var_14145 = const()[name = string("op_14145"), val = tensor([1, -1, 1024, 128])]; tensor key_states_247_cast_fp16 = reshape(shape = var_14145, x = x_489_cast_fp16)[name = string("key_states_247_cast_fp16")]; tensor x_493_axes_0 = const()[name = string("x_493_axes_0"), val = tensor([1])]; tensor x_493_cast_fp16 = expand_dims(axes = x_493_axes_0, x = V_layer_cache_49_cast_fp16)[name = string("x_493_cast_fp16")]; tensor var_14153 = const()[name = string("op_14153"), val = tensor([1, 2, 1, 1])]; tensor x_495_cast_fp16 = tile(reps = var_14153, x = x_493_cast_fp16)[name = string("x_495_cast_fp16")]; bool var_14180_transpose_x_0 = const()[name = string("op_14180_transpose_x_0"), val = bool(false)]; bool var_14180_transpose_y_0 = const()[name = string("op_14180_transpose_y_0"), val = bool(true)]; tensor var_14180 = matmul(transpose_x = var_14180_transpose_x_0, transpose_y = var_14180_transpose_y_0, x = query_states_195, y = key_states_247_cast_fp16)[name = string("op_14180")]; fp16 var_14181_to_fp16 = const()[name = string("op_14181_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_97_cast_fp16 = mul(x = var_14180, y = var_14181_to_fp16)[name = string("attn_weights_97_cast_fp16")]; tensor attn_weights_99_cast_fp16 = add(x = attn_weights_97_cast_fp16, y = causal_mask)[name = string("attn_weights_99_cast_fp16")]; int32 var_14216 = const()[name = string("op_14216"), val = int32(-1)]; tensor var_14218_cast_fp16 = softmax(axis = var_14216, x = attn_weights_99_cast_fp16)[name = string("op_14218_cast_fp16")]; tensor concat_444 = const()[name = string("concat_444"), val = tensor([16, 64, 1024])]; tensor reshape_72_cast_fp16 = reshape(shape = concat_444, x = var_14218_cast_fp16)[name = string("reshape_72_cast_fp16")]; tensor concat_445 = const()[name = string("concat_445"), val = tensor([16, 1024, 128])]; tensor reshape_73_cast_fp16 = reshape(shape = concat_445, x = x_495_cast_fp16)[name = string("reshape_73_cast_fp16")]; bool matmul_24_transpose_x_0 = const()[name = string("matmul_24_transpose_x_0"), val = bool(false)]; bool matmul_24_transpose_y_0 = const()[name = string("matmul_24_transpose_y_0"), val = bool(false)]; tensor matmul_24_cast_fp16 = matmul(transpose_x = matmul_24_transpose_x_0, transpose_y = matmul_24_transpose_y_0, x = reshape_72_cast_fp16, y = reshape_73_cast_fp16)[name = string("matmul_24_cast_fp16")]; tensor concat_449 = const()[name = string("concat_449"), val = tensor([1, 16, 64, 128])]; tensor reshape_74_cast_fp16 = reshape(shape = concat_449, x = matmul_24_cast_fp16)[name = string("reshape_74_cast_fp16")]; tensor var_14230_perm_0 = const()[name = string("op_14230_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_14249 = const()[name = string("op_14249"), val = tensor([1, 64, 2048])]; tensor var_14230_cast_fp16 = transpose(perm = var_14230_perm_0, x = reshape_74_cast_fp16)[name = string("transpose_31")]; tensor attn_output_245_cast_fp16 = reshape(shape = var_14249, x = var_14230_cast_fp16)[name = string("attn_output_245_cast_fp16")]; tensor var_14254 = const()[name = string("op_14254"), val = tensor([0, 2, 1])]; string var_14270_pad_type_0 = const()[name = string("op_14270_pad_type_0"), val = string("valid")]; int32 var_14270_groups_0 = const()[name = string("op_14270_groups_0"), val = int32(1)]; tensor var_14270_strides_0 = const()[name = string("op_14270_strides_0"), val = tensor([1])]; tensor var_14270_pad_0 = const()[name = string("op_14270_pad_0"), val = tensor([0, 0])]; tensor var_14270_dilations_0 = const()[name = string("op_14270_dilations_0"), val = tensor([1])]; tensor squeeze_24_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1087224704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1090370496))))[name = string("squeeze_24_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_14255_cast_fp16 = transpose(perm = var_14254, x = attn_output_245_cast_fp16)[name = string("transpose_30")]; tensor var_14270_cast_fp16 = conv(dilations = var_14270_dilations_0, groups = var_14270_groups_0, pad = var_14270_pad_0, pad_type = var_14270_pad_type_0, strides = var_14270_strides_0, weight = squeeze_24_cast_fp16_to_fp32_to_fp16_palettized, x = var_14255_cast_fp16)[name = string("op_14270_cast_fp16")]; tensor var_14274 = const()[name = string("op_14274"), val = tensor([0, 2, 1])]; tensor attn_output_249_cast_fp16 = transpose(perm = var_14274, x = var_14270_cast_fp16)[name = string("transpose_29")]; tensor hidden_states_149_cast_fp16 = add(x = hidden_states_145_cast_fp16, y = attn_output_249_cast_fp16)[name = string("hidden_states_149_cast_fp16")]; tensor mean_199_axes_0 = const()[name = string("mean_199_axes_0"), val = tensor([-1])]; bool mean_199_keep_dims_0 = const()[name = string("mean_199_keep_dims_0"), val = bool(true)]; tensor mean_199_cast_fp16 = reduce_mean(axes = mean_199_axes_0, keep_dims = mean_199_keep_dims_0, x = hidden_states_149_cast_fp16)[name = string("mean_199_cast_fp16")]; tensor input_443_cast_fp16 = sub(x = hidden_states_149_cast_fp16, y = mean_199_cast_fp16)[name = string("input_443_cast_fp16")]; tensor var_14293_axes_0 = const()[name = string("op_14293_axes_0"), val = tensor([-1])]; tensor model_model_layers_24_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_24_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1090403328)))]; fp16 var_14281_to_fp16 = const()[name = string("op_14281_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_14293_cast_fp16 = layer_norm(axes = var_14293_axes_0, epsilon = var_14281_to_fp16, gamma = model_model_layers_24_post_attention_layernorm_weight_to_fp16, x = input_443_cast_fp16)[name = string("op_14293_cast_fp16")]; tensor var_14307 = const()[name = string("op_14307"), val = tensor([0, 2, 1])]; tensor input_445_axes_0 = const()[name = string("input_445_axes_0"), val = tensor([2])]; tensor var_14308 = transpose(perm = var_14307, x = var_14293_cast_fp16)[name = string("transpose_28")]; tensor input_445 = expand_dims(axes = input_445_axes_0, x = var_14308)[name = string("input_445")]; string input_447_pad_type_0 = const()[name = string("input_447_pad_type_0"), val = string("valid")]; tensor input_447_strides_0 = const()[name = string("input_447_strides_0"), val = tensor([1, 1])]; tensor input_447_pad_0 = const()[name = string("input_447_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_447_dilations_0 = const()[name = string("input_447_dilations_0"), val = tensor([1, 1])]; int32 input_447_groups_0 = const()[name = string("input_447_groups_0"), val = int32(1)]; tensor input_447 = conv(dilations = input_447_dilations_0, groups = input_447_groups_0, pad = input_447_pad_0, pad_type = input_447_pad_type_0, strides = input_447_strides_0, weight = model_model_layers_24_mlp_gate_proj_weight_palettized, x = input_445)[name = string("input_447")]; string b_49_pad_type_0 = const()[name = string("b_49_pad_type_0"), val = string("valid")]; tensor b_49_strides_0 = const()[name = string("b_49_strides_0"), val = tensor([1, 1])]; tensor b_49_pad_0 = const()[name = string("b_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_49_dilations_0 = const()[name = string("b_49_dilations_0"), val = tensor([1, 1])]; int32 b_49_groups_0 = const()[name = string("b_49_groups_0"), val = int32(1)]; tensor b_49 = conv(dilations = b_49_dilations_0, groups = b_49_groups_0, pad = b_49_pad_0, pad_type = b_49_pad_type_0, strides = b_49_strides_0, weight = model_model_layers_24_mlp_up_proj_weight_palettized, x = input_445)[name = string("b_49")]; tensor c_49 = silu(x = input_447)[name = string("c_49")]; tensor input_449 = mul(x = c_49, y = b_49)[name = string("input_449")]; string e_49_pad_type_0 = const()[name = string("e_49_pad_type_0"), val = string("valid")]; tensor e_49_strides_0 = const()[name = string("e_49_strides_0"), val = tensor([1, 1])]; tensor e_49_pad_0 = const()[name = string("e_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_49_dilations_0 = const()[name = string("e_49_dilations_0"), val = tensor([1, 1])]; int32 e_49_groups_0 = const()[name = string("e_49_groups_0"), val = int32(1)]; tensor e_49 = conv(dilations = e_49_dilations_0, groups = e_49_groups_0, pad = e_49_pad_0, pad_type = e_49_pad_type_0, strides = e_49_strides_0, weight = model_model_layers_24_mlp_down_proj_weight_palettized, x = input_449)[name = string("e_49")]; tensor var_14330_axes_0 = const()[name = string("op_14330_axes_0"), val = tensor([2])]; tensor var_14330 = squeeze(axes = var_14330_axes_0, x = e_49)[name = string("op_14330")]; tensor var_14331 = const()[name = string("op_14331"), val = tensor([0, 2, 1])]; tensor var_14332 = transpose(perm = var_14331, x = var_14330)[name = string("transpose_27")]; tensor hidden_states_151_cast_fp16 = add(x = hidden_states_149_cast_fp16, y = var_14332)[name = string("hidden_states_151_cast_fp16")]; tensor mean_201_axes_0 = const()[name = string("mean_201_axes_0"), val = tensor([-1])]; bool mean_201_keep_dims_0 = const()[name = string("mean_201_keep_dims_0"), val = bool(true)]; tensor mean_201_cast_fp16 = reduce_mean(axes = mean_201_axes_0, keep_dims = mean_201_keep_dims_0, x = hidden_states_151_cast_fp16)[name = string("mean_201_cast_fp16")]; tensor input_451_cast_fp16 = sub(x = hidden_states_151_cast_fp16, y = mean_201_cast_fp16)[name = string("input_451_cast_fp16")]; tensor var_14350_axes_0 = const()[name = string("op_14350_axes_0"), val = tensor([-1])]; tensor model_model_layers_25_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_25_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1090407488)))]; fp16 var_14338_to_fp16 = const()[name = string("op_14338_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_14350_cast_fp16 = layer_norm(axes = var_14350_axes_0, epsilon = var_14338_to_fp16, gamma = model_model_layers_25_input_layernorm_weight_to_fp16, x = input_451_cast_fp16)[name = string("op_14350_cast_fp16")]; tensor var_14362 = const()[name = string("op_14362"), val = tensor([0, 2, 1])]; tensor var_14365_axes_0 = const()[name = string("op_14365_axes_0"), val = tensor([2])]; tensor var_14363 = transpose(perm = var_14362, x = var_14350_cast_fp16)[name = string("transpose_26")]; tensor var_14365 = expand_dims(axes = var_14365_axes_0, x = var_14363)[name = string("op_14365")]; string query_states_201_pad_type_0 = const()[name = string("query_states_201_pad_type_0"), val = string("valid")]; tensor query_states_201_strides_0 = const()[name = string("query_states_201_strides_0"), val = tensor([1, 1])]; tensor query_states_201_pad_0 = const()[name = string("query_states_201_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_201_dilations_0 = const()[name = string("query_states_201_dilations_0"), val = tensor([1, 1])]; int32 query_states_201_groups_0 = const()[name = string("query_states_201_groups_0"), val = int32(1)]; tensor query_states_201 = conv(dilations = query_states_201_dilations_0, groups = query_states_201_groups_0, pad = query_states_201_pad_0, pad_type = query_states_201_pad_type_0, strides = query_states_201_strides_0, weight = model_model_layers_25_self_attn_q_proj_weight_palettized, x = var_14365)[name = string("query_states_201")]; string key_states_251_pad_type_0 = const()[name = string("key_states_251_pad_type_0"), val = string("valid")]; tensor key_states_251_strides_0 = const()[name = string("key_states_251_strides_0"), val = tensor([1, 1])]; tensor key_states_251_pad_0 = const()[name = string("key_states_251_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_251_dilations_0 = const()[name = string("key_states_251_dilations_0"), val = tensor([1, 1])]; int32 key_states_251_groups_0 = const()[name = string("key_states_251_groups_0"), val = int32(1)]; tensor key_states_251 = conv(dilations = key_states_251_dilations_0, groups = key_states_251_groups_0, pad = key_states_251_pad_0, pad_type = key_states_251_pad_type_0, strides = key_states_251_strides_0, weight = model_model_layers_25_self_attn_k_proj_weight_palettized, x = var_14365)[name = string("key_states_251")]; string value_states_201_pad_type_0 = const()[name = string("value_states_201_pad_type_0"), val = string("valid")]; tensor value_states_201_strides_0 = const()[name = string("value_states_201_strides_0"), val = tensor([1, 1])]; tensor value_states_201_pad_0 = const()[name = string("value_states_201_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_201_dilations_0 = const()[name = string("value_states_201_dilations_0"), val = tensor([1, 1])]; int32 value_states_201_groups_0 = const()[name = string("value_states_201_groups_0"), val = int32(1)]; tensor value_states_201 = conv(dilations = value_states_201_dilations_0, groups = value_states_201_groups_0, pad = value_states_201_pad_0, pad_type = value_states_201_pad_type_0, strides = value_states_201_strides_0, weight = model_model_layers_25_self_attn_v_proj_weight_palettized, x = var_14365)[name = string("value_states_201")]; tensor var_14407 = const()[name = string("op_14407"), val = tensor([1, 16, 128, 64])]; tensor var_14408 = reshape(shape = var_14407, x = query_states_201)[name = string("op_14408")]; tensor var_14413 = const()[name = string("op_14413"), val = tensor([0, 1, 3, 2])]; tensor var_14418 = const()[name = string("op_14418"), val = tensor([1, 8, 128, 64])]; tensor var_14419 = reshape(shape = var_14418, x = key_states_251)[name = string("op_14419")]; tensor var_14424 = const()[name = string("op_14424"), val = tensor([0, 1, 3, 2])]; tensor var_14429 = const()[name = string("op_14429"), val = tensor([1, 8, 128, 64])]; tensor var_14430 = reshape(shape = var_14429, x = value_states_201)[name = string("op_14430")]; tensor var_14435 = const()[name = string("op_14435"), val = tensor([0, 1, 3, 2])]; tensor mean_203_axes_0 = const()[name = string("mean_203_axes_0"), val = tensor([-1])]; bool mean_203_keep_dims_0 = const()[name = string("mean_203_keep_dims_0"), val = bool(true)]; tensor x_501 = transpose(perm = var_14413, x = var_14408)[name = string("transpose_25")]; tensor mean_203 = reduce_mean(axes = mean_203_axes_0, keep_dims = mean_203_keep_dims_0, x = x_501)[name = string("mean_203")]; tensor input_455 = sub(x = x_501, y = mean_203)[name = string("input_455")]; tensor var_14452_axes_0 = const()[name = string("op_14452_axes_0"), val = tensor([-1])]; tensor model_model_layers_25_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_25_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1090411648)))]; fp16 var_14440_to_fp16 = const()[name = string("op_14440_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_14452_cast_fp16 = layer_norm(axes = var_14452_axes_0, epsilon = var_14440_to_fp16, gamma = model_model_layers_25_self_attn_q_norm_weight_to_fp16, x = input_455)[name = string("op_14452_cast_fp16")]; tensor mean_205_axes_0 = const()[name = string("mean_205_axes_0"), val = tensor([-1])]; bool mean_205_keep_dims_0 = const()[name = string("mean_205_keep_dims_0"), val = bool(true)]; tensor x_503 = transpose(perm = var_14424, x = var_14419)[name = string("transpose_24")]; tensor mean_205 = reduce_mean(axes = mean_205_axes_0, keep_dims = mean_205_keep_dims_0, x = x_503)[name = string("mean_205")]; tensor input_457 = sub(x = x_503, y = mean_205)[name = string("input_457")]; tensor var_14470_axes_0 = const()[name = string("op_14470_axes_0"), val = tensor([-1])]; tensor model_model_layers_25_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_25_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1090411968)))]; fp16 var_14458_to_fp16 = const()[name = string("op_14458_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_14470_cast_fp16 = layer_norm(axes = var_14470_axes_0, epsilon = var_14458_to_fp16, gamma = model_model_layers_25_self_attn_k_norm_weight_to_fp16, x = input_457)[name = string("op_14470_cast_fp16")]; tensor var_14485 = mul(x = var_14452_cast_fp16, y = cos_5)[name = string("op_14485")]; tensor x1_101_begin_0 = const()[name = string("x1_101_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_101_end_0 = const()[name = string("x1_101_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_101_end_mask_0 = const()[name = string("x1_101_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_101 = slice_by_index(begin = x1_101_begin_0, end = x1_101_end_0, end_mask = x1_101_end_mask_0, x = var_14452_cast_fp16)[name = string("x1_101")]; tensor x2_101_begin_0 = const()[name = string("x2_101_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_101_end_0 = const()[name = string("x2_101_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_101_end_mask_0 = const()[name = string("x2_101_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_101 = slice_by_index(begin = x2_101_begin_0, end = x2_101_end_0, end_mask = x2_101_end_mask_0, x = var_14452_cast_fp16)[name = string("x2_101")]; fp16 const_557_promoted = const()[name = string("const_557_promoted"), val = fp16(-0x1p+0)]; tensor var_14506 = mul(x = x2_101, y = const_557_promoted)[name = string("op_14506")]; int32 var_14508 = const()[name = string("op_14508"), val = int32(-1)]; bool var_14509_interleave_0 = const()[name = string("op_14509_interleave_0"), val = bool(false)]; tensor var_14509 = concat(axis = var_14508, interleave = var_14509_interleave_0, values = (var_14506, x1_101))[name = string("op_14509")]; tensor var_14510 = mul(x = var_14509, y = sin_5)[name = string("op_14510")]; tensor query_states_203 = add(x = var_14485, y = var_14510)[name = string("query_states_203")]; tensor var_14513 = mul(x = var_14470_cast_fp16, y = cos_5)[name = string("op_14513")]; tensor x1_103_begin_0 = const()[name = string("x1_103_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_103_end_0 = const()[name = string("x1_103_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_103_end_mask_0 = const()[name = string("x1_103_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_103 = slice_by_index(begin = x1_103_begin_0, end = x1_103_end_0, end_mask = x1_103_end_mask_0, x = var_14470_cast_fp16)[name = string("x1_103")]; tensor x2_103_begin_0 = const()[name = string("x2_103_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_103_end_0 = const()[name = string("x2_103_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_103_end_mask_0 = const()[name = string("x2_103_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_103 = slice_by_index(begin = x2_103_begin_0, end = x2_103_end_0, end_mask = x2_103_end_mask_0, x = var_14470_cast_fp16)[name = string("x2_103")]; fp16 const_560_promoted = const()[name = string("const_560_promoted"), val = fp16(-0x1p+0)]; tensor var_14534 = mul(x = x2_103, y = const_560_promoted)[name = string("op_14534")]; int32 var_14536 = const()[name = string("op_14536"), val = int32(-1)]; bool var_14537_interleave_0 = const()[name = string("op_14537_interleave_0"), val = bool(false)]; tensor var_14537 = concat(axis = var_14536, interleave = var_14537_interleave_0, values = (var_14534, x1_103))[name = string("op_14537")]; tensor var_14538 = mul(x = var_14537, y = sin_5)[name = string("op_14538")]; tensor key_states_253 = add(x = var_14513, y = var_14538)[name = string("key_states_253")]; tensor expand_dims_300 = const()[name = string("expand_dims_300"), val = tensor([25])]; tensor expand_dims_301 = const()[name = string("expand_dims_301"), val = tensor([0])]; tensor expand_dims_303 = const()[name = string("expand_dims_303"), val = tensor([0])]; tensor expand_dims_304 = const()[name = string("expand_dims_304"), val = tensor([26])]; int32 concat_452_axis_0 = const()[name = string("concat_452_axis_0"), val = int32(0)]; bool concat_452_interleave_0 = const()[name = string("concat_452_interleave_0"), val = bool(false)]; tensor concat_452 = concat(axis = concat_452_axis_0, interleave = concat_452_interleave_0, values = (expand_dims_300, expand_dims_301, current_pos, expand_dims_303))[name = string("concat_452")]; tensor concat_453_values1_0 = const()[name = string("concat_453_values1_0"), val = tensor([0])]; tensor concat_453_values3_0 = const()[name = string("concat_453_values3_0"), val = tensor([0])]; int32 concat_453_axis_0 = const()[name = string("concat_453_axis_0"), val = int32(0)]; bool concat_453_interleave_0 = const()[name = string("concat_453_interleave_0"), val = bool(false)]; tensor concat_453 = concat(axis = concat_453_axis_0, interleave = concat_453_interleave_0, values = (expand_dims_304, concat_453_values1_0, var_1760, concat_453_values3_0))[name = string("concat_453")]; tensor model_model_kv_cache_0_internal_tensor_assign_51_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_51_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_51_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_51_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_51_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_51_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_51_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_51_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_51_cast_fp16 = slice_update(begin = concat_452, begin_mask = model_model_kv_cache_0_internal_tensor_assign_51_begin_mask_0, end = concat_453, end_mask = model_model_kv_cache_0_internal_tensor_assign_51_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_51_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_51_stride_0, update = key_states_253, x = coreml_update_state_105)[name = string("model_model_kv_cache_0_internal_tensor_assign_51_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_51_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_106_write_state")]; tensor coreml_update_state_106 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_106")]; tensor expand_dims_306 = const()[name = string("expand_dims_306"), val = tensor([53])]; tensor expand_dims_307 = const()[name = string("expand_dims_307"), val = tensor([0])]; tensor expand_dims_309 = const()[name = string("expand_dims_309"), val = tensor([0])]; tensor expand_dims_310 = const()[name = string("expand_dims_310"), val = tensor([54])]; int32 concat_456_axis_0 = const()[name = string("concat_456_axis_0"), val = int32(0)]; bool concat_456_interleave_0 = const()[name = string("concat_456_interleave_0"), val = bool(false)]; tensor concat_456 = concat(axis = concat_456_axis_0, interleave = concat_456_interleave_0, values = (expand_dims_306, expand_dims_307, current_pos, expand_dims_309))[name = string("concat_456")]; tensor concat_457_values1_0 = const()[name = string("concat_457_values1_0"), val = tensor([0])]; tensor concat_457_values3_0 = const()[name = string("concat_457_values3_0"), val = tensor([0])]; int32 concat_457_axis_0 = const()[name = string("concat_457_axis_0"), val = int32(0)]; bool concat_457_interleave_0 = const()[name = string("concat_457_interleave_0"), val = bool(false)]; tensor concat_457 = concat(axis = concat_457_axis_0, interleave = concat_457_interleave_0, values = (expand_dims_310, concat_457_values1_0, var_1760, concat_457_values3_0))[name = string("concat_457")]; tensor model_model_kv_cache_0_internal_tensor_assign_52_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_52_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_52_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_52_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_52_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_52_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_52_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_52_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_203 = transpose(perm = var_14435, x = var_14430)[name = string("transpose_23")]; tensor model_model_kv_cache_0_internal_tensor_assign_52_cast_fp16 = slice_update(begin = concat_456, begin_mask = model_model_kv_cache_0_internal_tensor_assign_52_begin_mask_0, end = concat_457, end_mask = model_model_kv_cache_0_internal_tensor_assign_52_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_52_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_52_stride_0, update = value_states_203, x = coreml_update_state_106)[name = string("model_model_kv_cache_0_internal_tensor_assign_52_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_52_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_107_write_state")]; tensor coreml_update_state_107 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_107")]; tensor var_14609_begin_0 = const()[name = string("op_14609_begin_0"), val = tensor([25, 0, 0, 0])]; tensor var_14609_end_0 = const()[name = string("op_14609_end_0"), val = tensor([26, 8, 1024, 128])]; tensor var_14609_end_mask_0 = const()[name = string("op_14609_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_14609_cast_fp16 = slice_by_index(begin = var_14609_begin_0, end = var_14609_end_0, end_mask = var_14609_end_mask_0, x = coreml_update_state_107)[name = string("op_14609_cast_fp16")]; tensor K_layer_cache_51_axes_0 = const()[name = string("K_layer_cache_51_axes_0"), val = tensor([0])]; tensor K_layer_cache_51_cast_fp16 = squeeze(axes = K_layer_cache_51_axes_0, x = var_14609_cast_fp16)[name = string("K_layer_cache_51_cast_fp16")]; tensor var_14616_begin_0 = const()[name = string("op_14616_begin_0"), val = tensor([53, 0, 0, 0])]; tensor var_14616_end_0 = const()[name = string("op_14616_end_0"), val = tensor([54, 8, 1024, 128])]; tensor var_14616_end_mask_0 = const()[name = string("op_14616_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_14616_cast_fp16 = slice_by_index(begin = var_14616_begin_0, end = var_14616_end_0, end_mask = var_14616_end_mask_0, x = coreml_update_state_107)[name = string("op_14616_cast_fp16")]; tensor V_layer_cache_51_axes_0 = const()[name = string("V_layer_cache_51_axes_0"), val = tensor([0])]; tensor V_layer_cache_51_cast_fp16 = squeeze(axes = V_layer_cache_51_axes_0, x = var_14616_cast_fp16)[name = string("V_layer_cache_51_cast_fp16")]; tensor x_507_axes_0 = const()[name = string("x_507_axes_0"), val = tensor([1])]; tensor x_507_cast_fp16 = expand_dims(axes = x_507_axes_0, x = K_layer_cache_51_cast_fp16)[name = string("x_507_cast_fp16")]; tensor var_14645 = const()[name = string("op_14645"), val = tensor([1, 2, 1, 1])]; tensor x_509_cast_fp16 = tile(reps = var_14645, x = x_507_cast_fp16)[name = string("x_509_cast_fp16")]; tensor var_14657 = const()[name = string("op_14657"), val = tensor([1, -1, 1024, 128])]; tensor key_states_257_cast_fp16 = reshape(shape = var_14657, x = x_509_cast_fp16)[name = string("key_states_257_cast_fp16")]; tensor x_513_axes_0 = const()[name = string("x_513_axes_0"), val = tensor([1])]; tensor x_513_cast_fp16 = expand_dims(axes = x_513_axes_0, x = V_layer_cache_51_cast_fp16)[name = string("x_513_cast_fp16")]; tensor var_14665 = const()[name = string("op_14665"), val = tensor([1, 2, 1, 1])]; tensor x_515_cast_fp16 = tile(reps = var_14665, x = x_513_cast_fp16)[name = string("x_515_cast_fp16")]; bool var_14692_transpose_x_0 = const()[name = string("op_14692_transpose_x_0"), val = bool(false)]; bool var_14692_transpose_y_0 = const()[name = string("op_14692_transpose_y_0"), val = bool(true)]; tensor var_14692 = matmul(transpose_x = var_14692_transpose_x_0, transpose_y = var_14692_transpose_y_0, x = query_states_203, y = key_states_257_cast_fp16)[name = string("op_14692")]; fp16 var_14693_to_fp16 = const()[name = string("op_14693_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_101_cast_fp16 = mul(x = var_14692, y = var_14693_to_fp16)[name = string("attn_weights_101_cast_fp16")]; tensor attn_weights_103_cast_fp16 = add(x = attn_weights_101_cast_fp16, y = causal_mask)[name = string("attn_weights_103_cast_fp16")]; int32 var_14728 = const()[name = string("op_14728"), val = int32(-1)]; tensor var_14730_cast_fp16 = softmax(axis = var_14728, x = attn_weights_103_cast_fp16)[name = string("op_14730_cast_fp16")]; tensor concat_462 = const()[name = string("concat_462"), val = tensor([16, 64, 1024])]; tensor reshape_75_cast_fp16 = reshape(shape = concat_462, x = var_14730_cast_fp16)[name = string("reshape_75_cast_fp16")]; tensor concat_463 = const()[name = string("concat_463"), val = tensor([16, 1024, 128])]; tensor reshape_76_cast_fp16 = reshape(shape = concat_463, x = x_515_cast_fp16)[name = string("reshape_76_cast_fp16")]; bool matmul_25_transpose_x_0 = const()[name = string("matmul_25_transpose_x_0"), val = bool(false)]; bool matmul_25_transpose_y_0 = const()[name = string("matmul_25_transpose_y_0"), val = bool(false)]; tensor matmul_25_cast_fp16 = matmul(transpose_x = matmul_25_transpose_x_0, transpose_y = matmul_25_transpose_y_0, x = reshape_75_cast_fp16, y = reshape_76_cast_fp16)[name = string("matmul_25_cast_fp16")]; tensor concat_467 = const()[name = string("concat_467"), val = tensor([1, 16, 64, 128])]; tensor reshape_77_cast_fp16 = reshape(shape = concat_467, x = matmul_25_cast_fp16)[name = string("reshape_77_cast_fp16")]; tensor var_14742_perm_0 = const()[name = string("op_14742_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_14761 = const()[name = string("op_14761"), val = tensor([1, 64, 2048])]; tensor var_14742_cast_fp16 = transpose(perm = var_14742_perm_0, x = reshape_77_cast_fp16)[name = string("transpose_22")]; tensor attn_output_255_cast_fp16 = reshape(shape = var_14761, x = var_14742_cast_fp16)[name = string("attn_output_255_cast_fp16")]; tensor var_14766 = const()[name = string("op_14766"), val = tensor([0, 2, 1])]; string var_14782_pad_type_0 = const()[name = string("op_14782_pad_type_0"), val = string("valid")]; int32 var_14782_groups_0 = const()[name = string("op_14782_groups_0"), val = int32(1)]; tensor var_14782_strides_0 = const()[name = string("op_14782_strides_0"), val = tensor([1])]; tensor var_14782_pad_0 = const()[name = string("op_14782_pad_0"), val = tensor([0, 0])]; tensor var_14782_dilations_0 = const()[name = string("op_14782_dilations_0"), val = tensor([1])]; tensor squeeze_25_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1090412288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1093558080))))[name = string("squeeze_25_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_14767_cast_fp16 = transpose(perm = var_14766, x = attn_output_255_cast_fp16)[name = string("transpose_21")]; tensor var_14782_cast_fp16 = conv(dilations = var_14782_dilations_0, groups = var_14782_groups_0, pad = var_14782_pad_0, pad_type = var_14782_pad_type_0, strides = var_14782_strides_0, weight = squeeze_25_cast_fp16_to_fp32_to_fp16_palettized, x = var_14767_cast_fp16)[name = string("op_14782_cast_fp16")]; tensor var_14786 = const()[name = string("op_14786"), val = tensor([0, 2, 1])]; tensor attn_output_259_cast_fp16 = transpose(perm = var_14786, x = var_14782_cast_fp16)[name = string("transpose_20")]; tensor hidden_states_155_cast_fp16 = add(x = hidden_states_151_cast_fp16, y = attn_output_259_cast_fp16)[name = string("hidden_states_155_cast_fp16")]; tensor mean_207_axes_0 = const()[name = string("mean_207_axes_0"), val = tensor([-1])]; bool mean_207_keep_dims_0 = const()[name = string("mean_207_keep_dims_0"), val = bool(true)]; tensor mean_207_cast_fp16 = reduce_mean(axes = mean_207_axes_0, keep_dims = mean_207_keep_dims_0, x = hidden_states_155_cast_fp16)[name = string("mean_207_cast_fp16")]; tensor input_461_cast_fp16 = sub(x = hidden_states_155_cast_fp16, y = mean_207_cast_fp16)[name = string("input_461_cast_fp16")]; tensor var_14805_axes_0 = const()[name = string("op_14805_axes_0"), val = tensor([-1])]; tensor model_model_layers_25_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_25_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1093590912)))]; fp16 var_14793_to_fp16 = const()[name = string("op_14793_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_14805_cast_fp16 = layer_norm(axes = var_14805_axes_0, epsilon = var_14793_to_fp16, gamma = model_model_layers_25_post_attention_layernorm_weight_to_fp16, x = input_461_cast_fp16)[name = string("op_14805_cast_fp16")]; tensor var_14819 = const()[name = string("op_14819"), val = tensor([0, 2, 1])]; tensor input_463_axes_0 = const()[name = string("input_463_axes_0"), val = tensor([2])]; tensor var_14820 = transpose(perm = var_14819, x = var_14805_cast_fp16)[name = string("transpose_19")]; tensor input_463 = expand_dims(axes = input_463_axes_0, x = var_14820)[name = string("input_463")]; string input_465_pad_type_0 = const()[name = string("input_465_pad_type_0"), val = string("valid")]; tensor input_465_strides_0 = const()[name = string("input_465_strides_0"), val = tensor([1, 1])]; tensor input_465_pad_0 = const()[name = string("input_465_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_465_dilations_0 = const()[name = string("input_465_dilations_0"), val = tensor([1, 1])]; int32 input_465_groups_0 = const()[name = string("input_465_groups_0"), val = int32(1)]; tensor input_465 = conv(dilations = input_465_dilations_0, groups = input_465_groups_0, pad = input_465_pad_0, pad_type = input_465_pad_type_0, strides = input_465_strides_0, weight = model_model_layers_25_mlp_gate_proj_weight_palettized, x = input_463)[name = string("input_465")]; string b_51_pad_type_0 = const()[name = string("b_51_pad_type_0"), val = string("valid")]; tensor b_51_strides_0 = const()[name = string("b_51_strides_0"), val = tensor([1, 1])]; tensor b_51_pad_0 = const()[name = string("b_51_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_51_dilations_0 = const()[name = string("b_51_dilations_0"), val = tensor([1, 1])]; int32 b_51_groups_0 = const()[name = string("b_51_groups_0"), val = int32(1)]; tensor b_51 = conv(dilations = b_51_dilations_0, groups = b_51_groups_0, pad = b_51_pad_0, pad_type = b_51_pad_type_0, strides = b_51_strides_0, weight = model_model_layers_25_mlp_up_proj_weight_palettized, x = input_463)[name = string("b_51")]; tensor c_51 = silu(x = input_465)[name = string("c_51")]; tensor input_467 = mul(x = c_51, y = b_51)[name = string("input_467")]; string e_51_pad_type_0 = const()[name = string("e_51_pad_type_0"), val = string("valid")]; tensor e_51_strides_0 = const()[name = string("e_51_strides_0"), val = tensor([1, 1])]; tensor e_51_pad_0 = const()[name = string("e_51_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_51_dilations_0 = const()[name = string("e_51_dilations_0"), val = tensor([1, 1])]; int32 e_51_groups_0 = const()[name = string("e_51_groups_0"), val = int32(1)]; tensor e_51 = conv(dilations = e_51_dilations_0, groups = e_51_groups_0, pad = e_51_pad_0, pad_type = e_51_pad_type_0, strides = e_51_strides_0, weight = model_model_layers_25_mlp_down_proj_weight_palettized, x = input_467)[name = string("e_51")]; tensor var_14842_axes_0 = const()[name = string("op_14842_axes_0"), val = tensor([2])]; tensor var_14842 = squeeze(axes = var_14842_axes_0, x = e_51)[name = string("op_14842")]; tensor var_14843 = const()[name = string("op_14843"), val = tensor([0, 2, 1])]; tensor var_14844 = transpose(perm = var_14843, x = var_14842)[name = string("transpose_18")]; tensor hidden_states_157_cast_fp16 = add(x = hidden_states_155_cast_fp16, y = var_14844)[name = string("hidden_states_157_cast_fp16")]; tensor mean_209_axes_0 = const()[name = string("mean_209_axes_0"), val = tensor([-1])]; bool mean_209_keep_dims_0 = const()[name = string("mean_209_keep_dims_0"), val = bool(true)]; tensor mean_209_cast_fp16 = reduce_mean(axes = mean_209_axes_0, keep_dims = mean_209_keep_dims_0, x = hidden_states_157_cast_fp16)[name = string("mean_209_cast_fp16")]; tensor input_469_cast_fp16 = sub(x = hidden_states_157_cast_fp16, y = mean_209_cast_fp16)[name = string("input_469_cast_fp16")]; tensor var_14862_axes_0 = const()[name = string("op_14862_axes_0"), val = tensor([-1])]; tensor model_model_layers_26_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_26_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1093595072)))]; fp16 var_14850_to_fp16 = const()[name = string("op_14850_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_14862_cast_fp16 = layer_norm(axes = var_14862_axes_0, epsilon = var_14850_to_fp16, gamma = model_model_layers_26_input_layernorm_weight_to_fp16, x = input_469_cast_fp16)[name = string("op_14862_cast_fp16")]; tensor var_14874 = const()[name = string("op_14874"), val = tensor([0, 2, 1])]; tensor var_14877_axes_0 = const()[name = string("op_14877_axes_0"), val = tensor([2])]; tensor var_14875 = transpose(perm = var_14874, x = var_14862_cast_fp16)[name = string("transpose_17")]; tensor var_14877 = expand_dims(axes = var_14877_axes_0, x = var_14875)[name = string("op_14877")]; string query_states_209_pad_type_0 = const()[name = string("query_states_209_pad_type_0"), val = string("valid")]; tensor query_states_209_strides_0 = const()[name = string("query_states_209_strides_0"), val = tensor([1, 1])]; tensor query_states_209_pad_0 = const()[name = string("query_states_209_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_209_dilations_0 = const()[name = string("query_states_209_dilations_0"), val = tensor([1, 1])]; int32 query_states_209_groups_0 = const()[name = string("query_states_209_groups_0"), val = int32(1)]; tensor query_states_209 = conv(dilations = query_states_209_dilations_0, groups = query_states_209_groups_0, pad = query_states_209_pad_0, pad_type = query_states_209_pad_type_0, strides = query_states_209_strides_0, weight = model_model_layers_26_self_attn_q_proj_weight_palettized, x = var_14877)[name = string("query_states_209")]; string key_states_261_pad_type_0 = const()[name = string("key_states_261_pad_type_0"), val = string("valid")]; tensor key_states_261_strides_0 = const()[name = string("key_states_261_strides_0"), val = tensor([1, 1])]; tensor key_states_261_pad_0 = const()[name = string("key_states_261_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_261_dilations_0 = const()[name = string("key_states_261_dilations_0"), val = tensor([1, 1])]; int32 key_states_261_groups_0 = const()[name = string("key_states_261_groups_0"), val = int32(1)]; tensor key_states_261 = conv(dilations = key_states_261_dilations_0, groups = key_states_261_groups_0, pad = key_states_261_pad_0, pad_type = key_states_261_pad_type_0, strides = key_states_261_strides_0, weight = model_model_layers_26_self_attn_k_proj_weight_palettized, x = var_14877)[name = string("key_states_261")]; string value_states_209_pad_type_0 = const()[name = string("value_states_209_pad_type_0"), val = string("valid")]; tensor value_states_209_strides_0 = const()[name = string("value_states_209_strides_0"), val = tensor([1, 1])]; tensor value_states_209_pad_0 = const()[name = string("value_states_209_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_209_dilations_0 = const()[name = string("value_states_209_dilations_0"), val = tensor([1, 1])]; int32 value_states_209_groups_0 = const()[name = string("value_states_209_groups_0"), val = int32(1)]; tensor value_states_209 = conv(dilations = value_states_209_dilations_0, groups = value_states_209_groups_0, pad = value_states_209_pad_0, pad_type = value_states_209_pad_type_0, strides = value_states_209_strides_0, weight = model_model_layers_26_self_attn_v_proj_weight_palettized, x = var_14877)[name = string("value_states_209")]; tensor var_14919 = const()[name = string("op_14919"), val = tensor([1, 16, 128, 64])]; tensor var_14920 = reshape(shape = var_14919, x = query_states_209)[name = string("op_14920")]; tensor var_14925 = const()[name = string("op_14925"), val = tensor([0, 1, 3, 2])]; tensor var_14930 = const()[name = string("op_14930"), val = tensor([1, 8, 128, 64])]; tensor var_14931 = reshape(shape = var_14930, x = key_states_261)[name = string("op_14931")]; tensor var_14936 = const()[name = string("op_14936"), val = tensor([0, 1, 3, 2])]; tensor var_14941 = const()[name = string("op_14941"), val = tensor([1, 8, 128, 64])]; tensor var_14942 = reshape(shape = var_14941, x = value_states_209)[name = string("op_14942")]; tensor var_14947 = const()[name = string("op_14947"), val = tensor([0, 1, 3, 2])]; tensor mean_211_axes_0 = const()[name = string("mean_211_axes_0"), val = tensor([-1])]; bool mean_211_keep_dims_0 = const()[name = string("mean_211_keep_dims_0"), val = bool(true)]; tensor x_521 = transpose(perm = var_14925, x = var_14920)[name = string("transpose_16")]; tensor mean_211 = reduce_mean(axes = mean_211_axes_0, keep_dims = mean_211_keep_dims_0, x = x_521)[name = string("mean_211")]; tensor input_473 = sub(x = x_521, y = mean_211)[name = string("input_473")]; tensor var_14964_axes_0 = const()[name = string("op_14964_axes_0"), val = tensor([-1])]; tensor model_model_layers_26_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_26_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1093599232)))]; fp16 var_14952_to_fp16 = const()[name = string("op_14952_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_14964_cast_fp16 = layer_norm(axes = var_14964_axes_0, epsilon = var_14952_to_fp16, gamma = model_model_layers_26_self_attn_q_norm_weight_to_fp16, x = input_473)[name = string("op_14964_cast_fp16")]; tensor mean_213_axes_0 = const()[name = string("mean_213_axes_0"), val = tensor([-1])]; bool mean_213_keep_dims_0 = const()[name = string("mean_213_keep_dims_0"), val = bool(true)]; tensor x_523 = transpose(perm = var_14936, x = var_14931)[name = string("transpose_15")]; tensor mean_213 = reduce_mean(axes = mean_213_axes_0, keep_dims = mean_213_keep_dims_0, x = x_523)[name = string("mean_213")]; tensor input_475 = sub(x = x_523, y = mean_213)[name = string("input_475")]; tensor var_14982_axes_0 = const()[name = string("op_14982_axes_0"), val = tensor([-1])]; tensor model_model_layers_26_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_26_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1093599552)))]; fp16 var_14970_to_fp16 = const()[name = string("op_14970_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_14982_cast_fp16 = layer_norm(axes = var_14982_axes_0, epsilon = var_14970_to_fp16, gamma = model_model_layers_26_self_attn_k_norm_weight_to_fp16, x = input_475)[name = string("op_14982_cast_fp16")]; tensor var_14997 = mul(x = var_14964_cast_fp16, y = cos_5)[name = string("op_14997")]; tensor x1_105_begin_0 = const()[name = string("x1_105_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_105_end_0 = const()[name = string("x1_105_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_105_end_mask_0 = const()[name = string("x1_105_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_105 = slice_by_index(begin = x1_105_begin_0, end = x1_105_end_0, end_mask = x1_105_end_mask_0, x = var_14964_cast_fp16)[name = string("x1_105")]; tensor x2_105_begin_0 = const()[name = string("x2_105_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_105_end_0 = const()[name = string("x2_105_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_105_end_mask_0 = const()[name = string("x2_105_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_105 = slice_by_index(begin = x2_105_begin_0, end = x2_105_end_0, end_mask = x2_105_end_mask_0, x = var_14964_cast_fp16)[name = string("x2_105")]; fp16 const_579_promoted = const()[name = string("const_579_promoted"), val = fp16(-0x1p+0)]; tensor var_15018 = mul(x = x2_105, y = const_579_promoted)[name = string("op_15018")]; int32 var_15020 = const()[name = string("op_15020"), val = int32(-1)]; bool var_15021_interleave_0 = const()[name = string("op_15021_interleave_0"), val = bool(false)]; tensor var_15021 = concat(axis = var_15020, interleave = var_15021_interleave_0, values = (var_15018, x1_105))[name = string("op_15021")]; tensor var_15022 = mul(x = var_15021, y = sin_5)[name = string("op_15022")]; tensor query_states_211 = add(x = var_14997, y = var_15022)[name = string("query_states_211")]; tensor var_15025 = mul(x = var_14982_cast_fp16, y = cos_5)[name = string("op_15025")]; tensor x1_107_begin_0 = const()[name = string("x1_107_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_107_end_0 = const()[name = string("x1_107_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_107_end_mask_0 = const()[name = string("x1_107_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_107 = slice_by_index(begin = x1_107_begin_0, end = x1_107_end_0, end_mask = x1_107_end_mask_0, x = var_14982_cast_fp16)[name = string("x1_107")]; tensor x2_107_begin_0 = const()[name = string("x2_107_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_107_end_0 = const()[name = string("x2_107_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_107_end_mask_0 = const()[name = string("x2_107_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_107 = slice_by_index(begin = x2_107_begin_0, end = x2_107_end_0, end_mask = x2_107_end_mask_0, x = var_14982_cast_fp16)[name = string("x2_107")]; fp16 const_582_promoted = const()[name = string("const_582_promoted"), val = fp16(-0x1p+0)]; tensor var_15046 = mul(x = x2_107, y = const_582_promoted)[name = string("op_15046")]; int32 var_15048 = const()[name = string("op_15048"), val = int32(-1)]; bool var_15049_interleave_0 = const()[name = string("op_15049_interleave_0"), val = bool(false)]; tensor var_15049 = concat(axis = var_15048, interleave = var_15049_interleave_0, values = (var_15046, x1_107))[name = string("op_15049")]; tensor var_15050 = mul(x = var_15049, y = sin_5)[name = string("op_15050")]; tensor key_states_263 = add(x = var_15025, y = var_15050)[name = string("key_states_263")]; tensor expand_dims_312 = const()[name = string("expand_dims_312"), val = tensor([26])]; tensor expand_dims_313 = const()[name = string("expand_dims_313"), val = tensor([0])]; tensor expand_dims_315 = const()[name = string("expand_dims_315"), val = tensor([0])]; tensor expand_dims_316 = const()[name = string("expand_dims_316"), val = tensor([27])]; int32 concat_470_axis_0 = const()[name = string("concat_470_axis_0"), val = int32(0)]; bool concat_470_interleave_0 = const()[name = string("concat_470_interleave_0"), val = bool(false)]; tensor concat_470 = concat(axis = concat_470_axis_0, interleave = concat_470_interleave_0, values = (expand_dims_312, expand_dims_313, current_pos, expand_dims_315))[name = string("concat_470")]; tensor concat_471_values1_0 = const()[name = string("concat_471_values1_0"), val = tensor([0])]; tensor concat_471_values3_0 = const()[name = string("concat_471_values3_0"), val = tensor([0])]; int32 concat_471_axis_0 = const()[name = string("concat_471_axis_0"), val = int32(0)]; bool concat_471_interleave_0 = const()[name = string("concat_471_interleave_0"), val = bool(false)]; tensor concat_471 = concat(axis = concat_471_axis_0, interleave = concat_471_interleave_0, values = (expand_dims_316, concat_471_values1_0, var_1760, concat_471_values3_0))[name = string("concat_471")]; tensor model_model_kv_cache_0_internal_tensor_assign_53_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_53_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_53_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_53_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_53_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_53_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_53_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_53_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_53_cast_fp16 = slice_update(begin = concat_470, begin_mask = model_model_kv_cache_0_internal_tensor_assign_53_begin_mask_0, end = concat_471, end_mask = model_model_kv_cache_0_internal_tensor_assign_53_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_53_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_53_stride_0, update = key_states_263, x = coreml_update_state_107)[name = string("model_model_kv_cache_0_internal_tensor_assign_53_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_53_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_108_write_state")]; tensor coreml_update_state_108 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_108")]; tensor expand_dims_318 = const()[name = string("expand_dims_318"), val = tensor([54])]; tensor expand_dims_319 = const()[name = string("expand_dims_319"), val = tensor([0])]; tensor expand_dims_321 = const()[name = string("expand_dims_321"), val = tensor([0])]; tensor expand_dims_322 = const()[name = string("expand_dims_322"), val = tensor([55])]; int32 concat_474_axis_0 = const()[name = string("concat_474_axis_0"), val = int32(0)]; bool concat_474_interleave_0 = const()[name = string("concat_474_interleave_0"), val = bool(false)]; tensor concat_474 = concat(axis = concat_474_axis_0, interleave = concat_474_interleave_0, values = (expand_dims_318, expand_dims_319, current_pos, expand_dims_321))[name = string("concat_474")]; tensor concat_475_values1_0 = const()[name = string("concat_475_values1_0"), val = tensor([0])]; tensor concat_475_values3_0 = const()[name = string("concat_475_values3_0"), val = tensor([0])]; int32 concat_475_axis_0 = const()[name = string("concat_475_axis_0"), val = int32(0)]; bool concat_475_interleave_0 = const()[name = string("concat_475_interleave_0"), val = bool(false)]; tensor concat_475 = concat(axis = concat_475_axis_0, interleave = concat_475_interleave_0, values = (expand_dims_322, concat_475_values1_0, var_1760, concat_475_values3_0))[name = string("concat_475")]; tensor model_model_kv_cache_0_internal_tensor_assign_54_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_54_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_54_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_54_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_54_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_54_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_54_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_54_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_211 = transpose(perm = var_14947, x = var_14942)[name = string("transpose_14")]; tensor model_model_kv_cache_0_internal_tensor_assign_54_cast_fp16 = slice_update(begin = concat_474, begin_mask = model_model_kv_cache_0_internal_tensor_assign_54_begin_mask_0, end = concat_475, end_mask = model_model_kv_cache_0_internal_tensor_assign_54_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_54_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_54_stride_0, update = value_states_211, x = coreml_update_state_108)[name = string("model_model_kv_cache_0_internal_tensor_assign_54_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_54_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_109_write_state")]; tensor coreml_update_state_109 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_109")]; tensor var_15121_begin_0 = const()[name = string("op_15121_begin_0"), val = tensor([26, 0, 0, 0])]; tensor var_15121_end_0 = const()[name = string("op_15121_end_0"), val = tensor([27, 8, 1024, 128])]; tensor var_15121_end_mask_0 = const()[name = string("op_15121_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_15121_cast_fp16 = slice_by_index(begin = var_15121_begin_0, end = var_15121_end_0, end_mask = var_15121_end_mask_0, x = coreml_update_state_109)[name = string("op_15121_cast_fp16")]; tensor K_layer_cache_53_axes_0 = const()[name = string("K_layer_cache_53_axes_0"), val = tensor([0])]; tensor K_layer_cache_53_cast_fp16 = squeeze(axes = K_layer_cache_53_axes_0, x = var_15121_cast_fp16)[name = string("K_layer_cache_53_cast_fp16")]; tensor var_15128_begin_0 = const()[name = string("op_15128_begin_0"), val = tensor([54, 0, 0, 0])]; tensor var_15128_end_0 = const()[name = string("op_15128_end_0"), val = tensor([55, 8, 1024, 128])]; tensor var_15128_end_mask_0 = const()[name = string("op_15128_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_15128_cast_fp16 = slice_by_index(begin = var_15128_begin_0, end = var_15128_end_0, end_mask = var_15128_end_mask_0, x = coreml_update_state_109)[name = string("op_15128_cast_fp16")]; tensor V_layer_cache_53_axes_0 = const()[name = string("V_layer_cache_53_axes_0"), val = tensor([0])]; tensor V_layer_cache_53_cast_fp16 = squeeze(axes = V_layer_cache_53_axes_0, x = var_15128_cast_fp16)[name = string("V_layer_cache_53_cast_fp16")]; tensor x_527_axes_0 = const()[name = string("x_527_axes_0"), val = tensor([1])]; tensor x_527_cast_fp16 = expand_dims(axes = x_527_axes_0, x = K_layer_cache_53_cast_fp16)[name = string("x_527_cast_fp16")]; tensor var_15157 = const()[name = string("op_15157"), val = tensor([1, 2, 1, 1])]; tensor x_529_cast_fp16 = tile(reps = var_15157, x = x_527_cast_fp16)[name = string("x_529_cast_fp16")]; tensor var_15169 = const()[name = string("op_15169"), val = tensor([1, -1, 1024, 128])]; tensor key_states_267_cast_fp16 = reshape(shape = var_15169, x = x_529_cast_fp16)[name = string("key_states_267_cast_fp16")]; tensor x_533_axes_0 = const()[name = string("x_533_axes_0"), val = tensor([1])]; tensor x_533_cast_fp16 = expand_dims(axes = x_533_axes_0, x = V_layer_cache_53_cast_fp16)[name = string("x_533_cast_fp16")]; tensor var_15177 = const()[name = string("op_15177"), val = tensor([1, 2, 1, 1])]; tensor x_535_cast_fp16 = tile(reps = var_15177, x = x_533_cast_fp16)[name = string("x_535_cast_fp16")]; bool var_15204_transpose_x_0 = const()[name = string("op_15204_transpose_x_0"), val = bool(false)]; bool var_15204_transpose_y_0 = const()[name = string("op_15204_transpose_y_0"), val = bool(true)]; tensor var_15204 = matmul(transpose_x = var_15204_transpose_x_0, transpose_y = var_15204_transpose_y_0, x = query_states_211, y = key_states_267_cast_fp16)[name = string("op_15204")]; fp16 var_15205_to_fp16 = const()[name = string("op_15205_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_105_cast_fp16 = mul(x = var_15204, y = var_15205_to_fp16)[name = string("attn_weights_105_cast_fp16")]; tensor attn_weights_107_cast_fp16 = add(x = attn_weights_105_cast_fp16, y = causal_mask)[name = string("attn_weights_107_cast_fp16")]; int32 var_15240 = const()[name = string("op_15240"), val = int32(-1)]; tensor var_15242_cast_fp16 = softmax(axis = var_15240, x = attn_weights_107_cast_fp16)[name = string("op_15242_cast_fp16")]; tensor concat_480 = const()[name = string("concat_480"), val = tensor([16, 64, 1024])]; tensor reshape_78_cast_fp16 = reshape(shape = concat_480, x = var_15242_cast_fp16)[name = string("reshape_78_cast_fp16")]; tensor concat_481 = const()[name = string("concat_481"), val = tensor([16, 1024, 128])]; tensor reshape_79_cast_fp16 = reshape(shape = concat_481, x = x_535_cast_fp16)[name = string("reshape_79_cast_fp16")]; bool matmul_26_transpose_x_0 = const()[name = string("matmul_26_transpose_x_0"), val = bool(false)]; bool matmul_26_transpose_y_0 = const()[name = string("matmul_26_transpose_y_0"), val = bool(false)]; tensor matmul_26_cast_fp16 = matmul(transpose_x = matmul_26_transpose_x_0, transpose_y = matmul_26_transpose_y_0, x = reshape_78_cast_fp16, y = reshape_79_cast_fp16)[name = string("matmul_26_cast_fp16")]; tensor concat_485 = const()[name = string("concat_485"), val = tensor([1, 16, 64, 128])]; tensor reshape_80_cast_fp16 = reshape(shape = concat_485, x = matmul_26_cast_fp16)[name = string("reshape_80_cast_fp16")]; tensor var_15254_perm_0 = const()[name = string("op_15254_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_15273 = const()[name = string("op_15273"), val = tensor([1, 64, 2048])]; tensor var_15254_cast_fp16 = transpose(perm = var_15254_perm_0, x = reshape_80_cast_fp16)[name = string("transpose_13")]; tensor attn_output_265_cast_fp16 = reshape(shape = var_15273, x = var_15254_cast_fp16)[name = string("attn_output_265_cast_fp16")]; tensor var_15278 = const()[name = string("op_15278"), val = tensor([0, 2, 1])]; string var_15294_pad_type_0 = const()[name = string("op_15294_pad_type_0"), val = string("valid")]; int32 var_15294_groups_0 = const()[name = string("op_15294_groups_0"), val = int32(1)]; tensor var_15294_strides_0 = const()[name = string("op_15294_strides_0"), val = tensor([1])]; tensor var_15294_pad_0 = const()[name = string("op_15294_pad_0"), val = tensor([0, 0])]; tensor var_15294_dilations_0 = const()[name = string("op_15294_dilations_0"), val = tensor([1])]; tensor squeeze_26_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1093599872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1096745664))))[name = string("squeeze_26_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_15279_cast_fp16 = transpose(perm = var_15278, x = attn_output_265_cast_fp16)[name = string("transpose_12")]; tensor var_15294_cast_fp16 = conv(dilations = var_15294_dilations_0, groups = var_15294_groups_0, pad = var_15294_pad_0, pad_type = var_15294_pad_type_0, strides = var_15294_strides_0, weight = squeeze_26_cast_fp16_to_fp32_to_fp16_palettized, x = var_15279_cast_fp16)[name = string("op_15294_cast_fp16")]; tensor var_15298 = const()[name = string("op_15298"), val = tensor([0, 2, 1])]; tensor attn_output_269_cast_fp16 = transpose(perm = var_15298, x = var_15294_cast_fp16)[name = string("transpose_11")]; tensor hidden_states_161_cast_fp16 = add(x = hidden_states_157_cast_fp16, y = attn_output_269_cast_fp16)[name = string("hidden_states_161_cast_fp16")]; tensor mean_215_axes_0 = const()[name = string("mean_215_axes_0"), val = tensor([-1])]; bool mean_215_keep_dims_0 = const()[name = string("mean_215_keep_dims_0"), val = bool(true)]; tensor mean_215_cast_fp16 = reduce_mean(axes = mean_215_axes_0, keep_dims = mean_215_keep_dims_0, x = hidden_states_161_cast_fp16)[name = string("mean_215_cast_fp16")]; tensor input_479_cast_fp16 = sub(x = hidden_states_161_cast_fp16, y = mean_215_cast_fp16)[name = string("input_479_cast_fp16")]; tensor var_15317_axes_0 = const()[name = string("op_15317_axes_0"), val = tensor([-1])]; tensor model_model_layers_26_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_26_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1096778496)))]; fp16 var_15305_to_fp16 = const()[name = string("op_15305_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_15317_cast_fp16 = layer_norm(axes = var_15317_axes_0, epsilon = var_15305_to_fp16, gamma = model_model_layers_26_post_attention_layernorm_weight_to_fp16, x = input_479_cast_fp16)[name = string("op_15317_cast_fp16")]; tensor var_15331 = const()[name = string("op_15331"), val = tensor([0, 2, 1])]; tensor input_481_axes_0 = const()[name = string("input_481_axes_0"), val = tensor([2])]; tensor var_15332 = transpose(perm = var_15331, x = var_15317_cast_fp16)[name = string("transpose_10")]; tensor input_481 = expand_dims(axes = input_481_axes_0, x = var_15332)[name = string("input_481")]; string input_483_pad_type_0 = const()[name = string("input_483_pad_type_0"), val = string("valid")]; tensor input_483_strides_0 = const()[name = string("input_483_strides_0"), val = tensor([1, 1])]; tensor input_483_pad_0 = const()[name = string("input_483_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_483_dilations_0 = const()[name = string("input_483_dilations_0"), val = tensor([1, 1])]; int32 input_483_groups_0 = const()[name = string("input_483_groups_0"), val = int32(1)]; tensor input_483 = conv(dilations = input_483_dilations_0, groups = input_483_groups_0, pad = input_483_pad_0, pad_type = input_483_pad_type_0, strides = input_483_strides_0, weight = model_model_layers_26_mlp_gate_proj_weight_palettized, x = input_481)[name = string("input_483")]; string b_53_pad_type_0 = const()[name = string("b_53_pad_type_0"), val = string("valid")]; tensor b_53_strides_0 = const()[name = string("b_53_strides_0"), val = tensor([1, 1])]; tensor b_53_pad_0 = const()[name = string("b_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_53_dilations_0 = const()[name = string("b_53_dilations_0"), val = tensor([1, 1])]; int32 b_53_groups_0 = const()[name = string("b_53_groups_0"), val = int32(1)]; tensor b_53 = conv(dilations = b_53_dilations_0, groups = b_53_groups_0, pad = b_53_pad_0, pad_type = b_53_pad_type_0, strides = b_53_strides_0, weight = model_model_layers_26_mlp_up_proj_weight_palettized, x = input_481)[name = string("b_53")]; tensor c_53 = silu(x = input_483)[name = string("c_53")]; tensor input_485 = mul(x = c_53, y = b_53)[name = string("input_485")]; string e_53_pad_type_0 = const()[name = string("e_53_pad_type_0"), val = string("valid")]; tensor e_53_strides_0 = const()[name = string("e_53_strides_0"), val = tensor([1, 1])]; tensor e_53_pad_0 = const()[name = string("e_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_53_dilations_0 = const()[name = string("e_53_dilations_0"), val = tensor([1, 1])]; int32 e_53_groups_0 = const()[name = string("e_53_groups_0"), val = int32(1)]; tensor e_53 = conv(dilations = e_53_dilations_0, groups = e_53_groups_0, pad = e_53_pad_0, pad_type = e_53_pad_type_0, strides = e_53_strides_0, weight = model_model_layers_26_mlp_down_proj_weight_palettized, x = input_485)[name = string("e_53")]; tensor var_15354_axes_0 = const()[name = string("op_15354_axes_0"), val = tensor([2])]; tensor var_15354 = squeeze(axes = var_15354_axes_0, x = e_53)[name = string("op_15354")]; tensor var_15355 = const()[name = string("op_15355"), val = tensor([0, 2, 1])]; tensor var_15356 = transpose(perm = var_15355, x = var_15354)[name = string("transpose_9")]; tensor hidden_states_163_cast_fp16 = add(x = hidden_states_161_cast_fp16, y = var_15356)[name = string("hidden_states_163_cast_fp16")]; tensor mean_217_axes_0 = const()[name = string("mean_217_axes_0"), val = tensor([-1])]; bool mean_217_keep_dims_0 = const()[name = string("mean_217_keep_dims_0"), val = bool(true)]; tensor mean_217_cast_fp16 = reduce_mean(axes = mean_217_axes_0, keep_dims = mean_217_keep_dims_0, x = hidden_states_163_cast_fp16)[name = string("mean_217_cast_fp16")]; tensor input_487_cast_fp16 = sub(x = hidden_states_163_cast_fp16, y = mean_217_cast_fp16)[name = string("input_487_cast_fp16")]; tensor var_15374_axes_0 = const()[name = string("op_15374_axes_0"), val = tensor([-1])]; tensor model_model_layers_27_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_27_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1096782656)))]; fp16 var_15362_to_fp16 = const()[name = string("op_15362_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_15374_cast_fp16 = layer_norm(axes = var_15374_axes_0, epsilon = var_15362_to_fp16, gamma = model_model_layers_27_input_layernorm_weight_to_fp16, x = input_487_cast_fp16)[name = string("op_15374_cast_fp16")]; tensor var_15386 = const()[name = string("op_15386"), val = tensor([0, 2, 1])]; tensor var_15389_axes_0 = const()[name = string("op_15389_axes_0"), val = tensor([2])]; tensor var_15387 = transpose(perm = var_15386, x = var_15374_cast_fp16)[name = string("transpose_8")]; tensor var_15389 = expand_dims(axes = var_15389_axes_0, x = var_15387)[name = string("op_15389")]; string query_states_217_pad_type_0 = const()[name = string("query_states_217_pad_type_0"), val = string("valid")]; tensor query_states_217_strides_0 = const()[name = string("query_states_217_strides_0"), val = tensor([1, 1])]; tensor query_states_217_pad_0 = const()[name = string("query_states_217_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_217_dilations_0 = const()[name = string("query_states_217_dilations_0"), val = tensor([1, 1])]; int32 query_states_217_groups_0 = const()[name = string("query_states_217_groups_0"), val = int32(1)]; tensor query_states_217 = conv(dilations = query_states_217_dilations_0, groups = query_states_217_groups_0, pad = query_states_217_pad_0, pad_type = query_states_217_pad_type_0, strides = query_states_217_strides_0, weight = model_model_layers_27_self_attn_q_proj_weight_palettized, x = var_15389)[name = string("query_states_217")]; string key_states_271_pad_type_0 = const()[name = string("key_states_271_pad_type_0"), val = string("valid")]; tensor key_states_271_strides_0 = const()[name = string("key_states_271_strides_0"), val = tensor([1, 1])]; tensor key_states_271_pad_0 = const()[name = string("key_states_271_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_271_dilations_0 = const()[name = string("key_states_271_dilations_0"), val = tensor([1, 1])]; int32 key_states_271_groups_0 = const()[name = string("key_states_271_groups_0"), val = int32(1)]; tensor key_states_271 = conv(dilations = key_states_271_dilations_0, groups = key_states_271_groups_0, pad = key_states_271_pad_0, pad_type = key_states_271_pad_type_0, strides = key_states_271_strides_0, weight = model_model_layers_27_self_attn_k_proj_weight_palettized, x = var_15389)[name = string("key_states_271")]; string value_states_217_pad_type_0 = const()[name = string("value_states_217_pad_type_0"), val = string("valid")]; tensor value_states_217_strides_0 = const()[name = string("value_states_217_strides_0"), val = tensor([1, 1])]; tensor value_states_217_pad_0 = const()[name = string("value_states_217_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_217_dilations_0 = const()[name = string("value_states_217_dilations_0"), val = tensor([1, 1])]; int32 value_states_217_groups_0 = const()[name = string("value_states_217_groups_0"), val = int32(1)]; tensor value_states_217 = conv(dilations = value_states_217_dilations_0, groups = value_states_217_groups_0, pad = value_states_217_pad_0, pad_type = value_states_217_pad_type_0, strides = value_states_217_strides_0, weight = model_model_layers_27_self_attn_v_proj_weight_palettized, x = var_15389)[name = string("value_states_217")]; tensor var_15431 = const()[name = string("op_15431"), val = tensor([1, 16, 128, 64])]; tensor var_15432 = reshape(shape = var_15431, x = query_states_217)[name = string("op_15432")]; tensor var_15437 = const()[name = string("op_15437"), val = tensor([0, 1, 3, 2])]; tensor var_15442 = const()[name = string("op_15442"), val = tensor([1, 8, 128, 64])]; tensor var_15443 = reshape(shape = var_15442, x = key_states_271)[name = string("op_15443")]; tensor var_15448 = const()[name = string("op_15448"), val = tensor([0, 1, 3, 2])]; tensor var_15453 = const()[name = string("op_15453"), val = tensor([1, 8, 128, 64])]; tensor var_15454 = reshape(shape = var_15453, x = value_states_217)[name = string("op_15454")]; tensor var_15459 = const()[name = string("op_15459"), val = tensor([0, 1, 3, 2])]; tensor mean_219_axes_0 = const()[name = string("mean_219_axes_0"), val = tensor([-1])]; bool mean_219_keep_dims_0 = const()[name = string("mean_219_keep_dims_0"), val = bool(true)]; tensor x_541 = transpose(perm = var_15437, x = var_15432)[name = string("transpose_7")]; tensor mean_219 = reduce_mean(axes = mean_219_axes_0, keep_dims = mean_219_keep_dims_0, x = x_541)[name = string("mean_219")]; tensor input_491 = sub(x = x_541, y = mean_219)[name = string("input_491")]; tensor var_15476_axes_0 = const()[name = string("op_15476_axes_0"), val = tensor([-1])]; tensor model_model_layers_27_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_27_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1096786816)))]; fp16 var_15464_to_fp16 = const()[name = string("op_15464_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_15476_cast_fp16 = layer_norm(axes = var_15476_axes_0, epsilon = var_15464_to_fp16, gamma = model_model_layers_27_self_attn_q_norm_weight_to_fp16, x = input_491)[name = string("op_15476_cast_fp16")]; tensor mean_221_axes_0 = const()[name = string("mean_221_axes_0"), val = tensor([-1])]; bool mean_221_keep_dims_0 = const()[name = string("mean_221_keep_dims_0"), val = bool(true)]; tensor x_543 = transpose(perm = var_15448, x = var_15443)[name = string("transpose_6")]; tensor mean_221 = reduce_mean(axes = mean_221_axes_0, keep_dims = mean_221_keep_dims_0, x = x_543)[name = string("mean_221")]; tensor input_493 = sub(x = x_543, y = mean_221)[name = string("input_493")]; tensor var_15494_axes_0 = const()[name = string("op_15494_axes_0"), val = tensor([-1])]; tensor model_model_layers_27_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_27_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1096787136)))]; fp16 var_15482_to_fp16 = const()[name = string("op_15482_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_15494_cast_fp16 = layer_norm(axes = var_15494_axes_0, epsilon = var_15482_to_fp16, gamma = model_model_layers_27_self_attn_k_norm_weight_to_fp16, x = input_493)[name = string("op_15494_cast_fp16")]; tensor var_15509 = mul(x = var_15476_cast_fp16, y = cos_5)[name = string("op_15509")]; tensor x1_109_begin_0 = const()[name = string("x1_109_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_109_end_0 = const()[name = string("x1_109_end_0"), val = tensor([1, 16, 64, 64])]; tensor x1_109_end_mask_0 = const()[name = string("x1_109_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_109 = slice_by_index(begin = x1_109_begin_0, end = x1_109_end_0, end_mask = x1_109_end_mask_0, x = var_15476_cast_fp16)[name = string("x1_109")]; tensor x2_109_begin_0 = const()[name = string("x2_109_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_109_end_0 = const()[name = string("x2_109_end_0"), val = tensor([1, 16, 64, 128])]; tensor x2_109_end_mask_0 = const()[name = string("x2_109_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_109 = slice_by_index(begin = x2_109_begin_0, end = x2_109_end_0, end_mask = x2_109_end_mask_0, x = var_15476_cast_fp16)[name = string("x2_109")]; fp16 const_601_promoted = const()[name = string("const_601_promoted"), val = fp16(-0x1p+0)]; tensor var_15530 = mul(x = x2_109, y = const_601_promoted)[name = string("op_15530")]; int32 var_15532 = const()[name = string("op_15532"), val = int32(-1)]; bool var_15533_interleave_0 = const()[name = string("op_15533_interleave_0"), val = bool(false)]; tensor var_15533 = concat(axis = var_15532, interleave = var_15533_interleave_0, values = (var_15530, x1_109))[name = string("op_15533")]; tensor var_15534 = mul(x = var_15533, y = sin_5)[name = string("op_15534")]; tensor query_states_219 = add(x = var_15509, y = var_15534)[name = string("query_states_219")]; tensor var_15537 = mul(x = var_15494_cast_fp16, y = cos_5)[name = string("op_15537")]; tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 64, 64])]; tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = var_15494_cast_fp16)[name = string("x1")]; tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 64, 128])]; tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = var_15494_cast_fp16)[name = string("x2")]; fp16 const_604_promoted = const()[name = string("const_604_promoted"), val = fp16(-0x1p+0)]; tensor var_15558 = mul(x = x2, y = const_604_promoted)[name = string("op_15558")]; int32 var_15560 = const()[name = string("op_15560"), val = int32(-1)]; bool var_15561_interleave_0 = const()[name = string("op_15561_interleave_0"), val = bool(false)]; tensor var_15561 = concat(axis = var_15560, interleave = var_15561_interleave_0, values = (var_15558, x1))[name = string("op_15561")]; tensor var_15562 = mul(x = var_15561, y = sin_5)[name = string("op_15562")]; tensor key_states_273 = add(x = var_15537, y = var_15562)[name = string("key_states_273")]; tensor expand_dims_324 = const()[name = string("expand_dims_324"), val = tensor([27])]; tensor expand_dims_325 = const()[name = string("expand_dims_325"), val = tensor([0])]; tensor expand_dims_327 = const()[name = string("expand_dims_327"), val = tensor([0])]; tensor expand_dims_328 = const()[name = string("expand_dims_328"), val = tensor([28])]; int32 concat_488_axis_0 = const()[name = string("concat_488_axis_0"), val = int32(0)]; bool concat_488_interleave_0 = const()[name = string("concat_488_interleave_0"), val = bool(false)]; tensor concat_488 = concat(axis = concat_488_axis_0, interleave = concat_488_interleave_0, values = (expand_dims_324, expand_dims_325, current_pos, expand_dims_327))[name = string("concat_488")]; tensor concat_489_values1_0 = const()[name = string("concat_489_values1_0"), val = tensor([0])]; tensor concat_489_values3_0 = const()[name = string("concat_489_values3_0"), val = tensor([0])]; int32 concat_489_axis_0 = const()[name = string("concat_489_axis_0"), val = int32(0)]; bool concat_489_interleave_0 = const()[name = string("concat_489_interleave_0"), val = bool(false)]; tensor concat_489 = concat(axis = concat_489_axis_0, interleave = concat_489_interleave_0, values = (expand_dims_328, concat_489_values1_0, var_1760, concat_489_values3_0))[name = string("concat_489")]; tensor model_model_kv_cache_0_internal_tensor_assign_55_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_55_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_55_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_55_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_55_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_55_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_55_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_55_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_55_cast_fp16 = slice_update(begin = concat_488, begin_mask = model_model_kv_cache_0_internal_tensor_assign_55_begin_mask_0, end = concat_489, end_mask = model_model_kv_cache_0_internal_tensor_assign_55_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_55_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_55_stride_0, update = key_states_273, x = coreml_update_state_109)[name = string("model_model_kv_cache_0_internal_tensor_assign_55_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_55_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_110_write_state")]; tensor coreml_update_state_110 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_110")]; tensor expand_dims_330 = const()[name = string("expand_dims_330"), val = tensor([55])]; tensor expand_dims_331 = const()[name = string("expand_dims_331"), val = tensor([0])]; tensor expand_dims_333 = const()[name = string("expand_dims_333"), val = tensor([0])]; tensor expand_dims_334 = const()[name = string("expand_dims_334"), val = tensor([56])]; int32 concat_492_axis_0 = const()[name = string("concat_492_axis_0"), val = int32(0)]; bool concat_492_interleave_0 = const()[name = string("concat_492_interleave_0"), val = bool(false)]; tensor concat_492 = concat(axis = concat_492_axis_0, interleave = concat_492_interleave_0, values = (expand_dims_330, expand_dims_331, current_pos, expand_dims_333))[name = string("concat_492")]; tensor concat_493_values1_0 = const()[name = string("concat_493_values1_0"), val = tensor([0])]; tensor concat_493_values3_0 = const()[name = string("concat_493_values3_0"), val = tensor([0])]; int32 concat_493_axis_0 = const()[name = string("concat_493_axis_0"), val = int32(0)]; bool concat_493_interleave_0 = const()[name = string("concat_493_interleave_0"), val = bool(false)]; tensor concat_493 = concat(axis = concat_493_axis_0, interleave = concat_493_interleave_0, values = (expand_dims_334, concat_493_values1_0, var_1760, concat_493_values3_0))[name = string("concat_493")]; tensor model_model_kv_cache_0_internal_tensor_assign_56_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_56_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_56_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_56_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_56_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_56_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_56_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_56_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_219 = transpose(perm = var_15459, x = var_15454)[name = string("transpose_5")]; tensor model_model_kv_cache_0_internal_tensor_assign_56_cast_fp16 = slice_update(begin = concat_492, begin_mask = model_model_kv_cache_0_internal_tensor_assign_56_begin_mask_0, end = concat_493, end_mask = model_model_kv_cache_0_internal_tensor_assign_56_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_56_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_56_stride_0, update = value_states_219, x = coreml_update_state_110)[name = string("model_model_kv_cache_0_internal_tensor_assign_56_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_56_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_111_write_state")]; tensor coreml_update_state_111 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_111")]; tensor var_15633_begin_0 = const()[name = string("op_15633_begin_0"), val = tensor([27, 0, 0, 0])]; tensor var_15633_end_0 = const()[name = string("op_15633_end_0"), val = tensor([28, 8, 1024, 128])]; tensor var_15633_end_mask_0 = const()[name = string("op_15633_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_15633_cast_fp16 = slice_by_index(begin = var_15633_begin_0, end = var_15633_end_0, end_mask = var_15633_end_mask_0, x = coreml_update_state_111)[name = string("op_15633_cast_fp16")]; tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_15633_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; tensor var_15640_begin_0 = const()[name = string("op_15640_begin_0"), val = tensor([55, 0, 0, 0])]; tensor var_15640_end_0 = const()[name = string("op_15640_end_0"), val = tensor([1, 8, 1024, 128])]; tensor var_15640_end_mask_0 = const()[name = string("op_15640_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_15640_cast_fp16 = slice_by_index(begin = var_15640_begin_0, end = var_15640_end_0, end_mask = var_15640_end_mask_0, x = coreml_update_state_111)[name = string("op_15640_cast_fp16")]; tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_15640_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; tensor x_547_axes_0 = const()[name = string("x_547_axes_0"), val = tensor([1])]; tensor x_547_cast_fp16 = expand_dims(axes = x_547_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_547_cast_fp16")]; tensor var_15669 = const()[name = string("op_15669"), val = tensor([1, 2, 1, 1])]; tensor x_549_cast_fp16 = tile(reps = var_15669, x = x_547_cast_fp16)[name = string("x_549_cast_fp16")]; tensor var_15681 = const()[name = string("op_15681"), val = tensor([1, -1, 1024, 128])]; tensor key_states_277_cast_fp16 = reshape(shape = var_15681, x = x_549_cast_fp16)[name = string("key_states_277_cast_fp16")]; tensor x_553_axes_0 = const()[name = string("x_553_axes_0"), val = tensor([1])]; tensor x_553_cast_fp16 = expand_dims(axes = x_553_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_553_cast_fp16")]; tensor var_15689 = const()[name = string("op_15689"), val = tensor([1, 2, 1, 1])]; tensor x_555_cast_fp16 = tile(reps = var_15689, x = x_553_cast_fp16)[name = string("x_555_cast_fp16")]; bool var_15716_transpose_x_0 = const()[name = string("op_15716_transpose_x_0"), val = bool(false)]; bool var_15716_transpose_y_0 = const()[name = string("op_15716_transpose_y_0"), val = bool(true)]; tensor var_15716 = matmul(transpose_x = var_15716_transpose_x_0, transpose_y = var_15716_transpose_y_0, x = query_states_219, y = key_states_277_cast_fp16)[name = string("op_15716")]; fp16 var_15717_to_fp16 = const()[name = string("op_15717_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_109_cast_fp16 = mul(x = var_15716, y = var_15717_to_fp16)[name = string("attn_weights_109_cast_fp16")]; tensor attn_weights_cast_fp16 = add(x = attn_weights_109_cast_fp16, y = causal_mask)[name = string("attn_weights_cast_fp16")]; int32 var_15752 = const()[name = string("op_15752"), val = int32(-1)]; tensor var_15754_cast_fp16 = softmax(axis = var_15752, x = attn_weights_cast_fp16)[name = string("op_15754_cast_fp16")]; tensor concat_498 = const()[name = string("concat_498"), val = tensor([16, 64, 1024])]; tensor reshape_81_cast_fp16 = reshape(shape = concat_498, x = var_15754_cast_fp16)[name = string("reshape_81_cast_fp16")]; tensor concat_499 = const()[name = string("concat_499"), val = tensor([16, 1024, 128])]; tensor reshape_82_cast_fp16 = reshape(shape = concat_499, x = x_555_cast_fp16)[name = string("reshape_82_cast_fp16")]; bool matmul_27_transpose_x_0 = const()[name = string("matmul_27_transpose_x_0"), val = bool(false)]; bool matmul_27_transpose_y_0 = const()[name = string("matmul_27_transpose_y_0"), val = bool(false)]; tensor matmul_27_cast_fp16 = matmul(transpose_x = matmul_27_transpose_x_0, transpose_y = matmul_27_transpose_y_0, x = reshape_81_cast_fp16, y = reshape_82_cast_fp16)[name = string("matmul_27_cast_fp16")]; tensor concat_503 = const()[name = string("concat_503"), val = tensor([1, 16, 64, 128])]; tensor reshape_83_cast_fp16 = reshape(shape = concat_503, x = matmul_27_cast_fp16)[name = string("reshape_83_cast_fp16")]; tensor var_15766_perm_0 = const()[name = string("op_15766_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_15785 = const()[name = string("op_15785"), val = tensor([1, 64, 2048])]; tensor var_15766_cast_fp16 = transpose(perm = var_15766_perm_0, x = reshape_83_cast_fp16)[name = string("transpose_4")]; tensor attn_output_275_cast_fp16 = reshape(shape = var_15785, x = var_15766_cast_fp16)[name = string("attn_output_275_cast_fp16")]; tensor var_15790 = const()[name = string("op_15790"), val = tensor([0, 2, 1])]; string var_15806_pad_type_0 = const()[name = string("op_15806_pad_type_0"), val = string("valid")]; int32 var_15806_groups_0 = const()[name = string("op_15806_groups_0"), val = int32(1)]; tensor var_15806_strides_0 = const()[name = string("op_15806_strides_0"), val = tensor([1])]; tensor var_15806_pad_0 = const()[name = string("op_15806_pad_0"), val = tensor([0, 0])]; tensor var_15806_dilations_0 = const()[name = string("op_15806_dilations_0"), val = tensor([1])]; tensor squeeze_27_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1096787456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1099933248))))[name = string("squeeze_27_cast_fp16_to_fp32_to_fp16_palettized")]; tensor var_15791_cast_fp16 = transpose(perm = var_15790, x = attn_output_275_cast_fp16)[name = string("transpose_3")]; tensor var_15806_cast_fp16 = conv(dilations = var_15806_dilations_0, groups = var_15806_groups_0, pad = var_15806_pad_0, pad_type = var_15806_pad_type_0, strides = var_15806_strides_0, weight = squeeze_27_cast_fp16_to_fp32_to_fp16_palettized, x = var_15791_cast_fp16)[name = string("op_15806_cast_fp16")]; tensor var_15810 = const()[name = string("op_15810"), val = tensor([0, 2, 1])]; tensor attn_output_cast_fp16 = transpose(perm = var_15810, x = var_15806_cast_fp16)[name = string("transpose_2")]; tensor hidden_states_cast_fp16 = add(x = hidden_states_163_cast_fp16, y = attn_output_cast_fp16)[name = string("hidden_states_cast_fp16")]; tensor mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor([-1])]; bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)]; tensor mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_cast_fp16)[name = string("mean_cast_fp16")]; tensor input_497_cast_fp16 = sub(x = hidden_states_cast_fp16, y = mean_cast_fp16)[name = string("input_497_cast_fp16")]; tensor var_15829_axes_0 = const()[name = string("op_15829_axes_0"), val = tensor([-1])]; tensor model_model_layers_27_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_27_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1099966080)))]; fp16 var_15817_to_fp16 = const()[name = string("op_15817_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_15829_cast_fp16 = layer_norm(axes = var_15829_axes_0, epsilon = var_15817_to_fp16, gamma = model_model_layers_27_post_attention_layernorm_weight_to_fp16, x = input_497_cast_fp16)[name = string("op_15829_cast_fp16")]; tensor var_15843 = const()[name = string("op_15843"), val = tensor([0, 2, 1])]; tensor input_499_axes_0 = const()[name = string("input_499_axes_0"), val = tensor([2])]; tensor var_15844 = transpose(perm = var_15843, x = var_15829_cast_fp16)[name = string("transpose_1")]; tensor input_499 = expand_dims(axes = input_499_axes_0, x = var_15844)[name = string("input_499")]; string input_501_pad_type_0 = const()[name = string("input_501_pad_type_0"), val = string("valid")]; tensor input_501_strides_0 = const()[name = string("input_501_strides_0"), val = tensor([1, 1])]; tensor input_501_pad_0 = const()[name = string("input_501_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_501_dilations_0 = const()[name = string("input_501_dilations_0"), val = tensor([1, 1])]; int32 input_501_groups_0 = const()[name = string("input_501_groups_0"), val = int32(1)]; tensor input_501 = conv(dilations = input_501_dilations_0, groups = input_501_groups_0, pad = input_501_pad_0, pad_type = input_501_pad_type_0, strides = input_501_strides_0, weight = model_model_layers_27_mlp_gate_proj_weight_palettized, x = input_499)[name = string("input_501")]; string b_pad_type_0 = const()[name = string("b_pad_type_0"), val = string("valid")]; tensor b_strides_0 = const()[name = string("b_strides_0"), val = tensor([1, 1])]; tensor b_pad_0 = const()[name = string("b_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_dilations_0 = const()[name = string("b_dilations_0"), val = tensor([1, 1])]; int32 b_groups_0 = const()[name = string("b_groups_0"), val = int32(1)]; tensor b = conv(dilations = b_dilations_0, groups = b_groups_0, pad = b_pad_0, pad_type = b_pad_type_0, strides = b_strides_0, weight = model_model_layers_27_mlp_up_proj_weight_palettized, x = input_499)[name = string("b")]; tensor c = silu(x = input_501)[name = string("c")]; tensor input = mul(x = c, y = b)[name = string("input")]; string e_pad_type_0 = const()[name = string("e_pad_type_0"), val = string("valid")]; tensor e_strides_0 = const()[name = string("e_strides_0"), val = tensor([1, 1])]; tensor e_pad_0 = const()[name = string("e_pad_0"), val = tensor([0, 0, 0, 0])]; tensor e_dilations_0 = const()[name = string("e_dilations_0"), val = tensor([1, 1])]; int32 e_groups_0 = const()[name = string("e_groups_0"), val = int32(1)]; tensor e = conv(dilations = e_dilations_0, groups = e_groups_0, pad = e_pad_0, pad_type = e_pad_type_0, strides = e_strides_0, weight = model_model_layers_27_mlp_down_proj_weight_palettized, x = input)[name = string("e")]; tensor var_15866_axes_0 = const()[name = string("op_15866_axes_0"), val = tensor([2])]; tensor var_15866 = squeeze(axes = var_15866_axes_0, x = e)[name = string("op_15866")]; tensor var_15867 = const()[name = string("op_15867"), val = tensor([0, 2, 1])]; tensor var_15868 = transpose(perm = var_15867, x = var_15866)[name = string("transpose_0")]; tensor out_cast_fp16 = add(x = hidden_states_cast_fp16, y = var_15868)[name = string("out_cast_fp16")]; tensor var_15880_begin_0 = const()[name = string("op_15880_begin_0"), val = tensor([0, 0, 0])]; tensor var_15880_end_0 = const()[name = string("op_15880_end_0"), val = tensor([1, 1, 2048])]; tensor var_15880_end_mask_0 = const()[name = string("op_15880_end_mask_0"), val = tensor([true, false, true])]; tensor output_hidden_states = slice_by_index(begin = var_15880_begin_0, end = var_15880_end_0, end_mask = var_15880_end_mask_0, x = out_cast_fp16)[name = string("op_15880_cast_fp16")]; } -> (output_hidden_states); }