diff --git "a/openai_whisper-large-v3-v20240930_626MB/TextDecoder.mlmodelc/model.mil" "b/openai_whisper-large-v3-v20240930_626MB/TextDecoder.mlmodelc/model.mil"
deleted file mode 100644--- "a/openai_whisper-large-v3-v20240930_626MB/TextDecoder.mlmodelc/model.mil"
+++ /dev/null
@@ -1,941 +0,0 @@
-program(1.3)
-[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}})]
-{
-    func main<ios18>(tensor<int32, [1]> cache_length, tensor<fp16, [1, 448]> decoder_key_padding_mask, state<tensor<fp16, [4, 1280, 1, 1536]>> encoder_attn_key_cache, state<tensor<fp16, [1, 1536]>> encoder_attn_key_padding_mask, state<tensor<fp16, [4, 1280, 1, 1536]>> encoder_attn_value_cache, tensor<int32, [1]> input_ids, tensor<fp16, [1, 448]> kv_cache_update_mask, state<tensor<fp16, [4, 1280, 1, 448]>> self_attn_key_cache, state<tensor<fp16, [4, 1280, 1, 448]>> self_attn_value_cache) {
-            int32 var_26_axis_0 = const()[name = string("op_26_axis_0"), val = int32(0)];
-            int32 var_26_batch_dims_0 = const()[name = string("op_26_batch_dims_0"), val = int32(0)];
-            bool var_26_validate_indices_0 = const()[name = string("op_26_validate_indices_0"), val = bool(false)];
-            tensor<fp16, [51866, 1280]> embed_tokens_weight_to_fp16 = const()[name = string("embed_tokens_weight_to_fp16"), val = tensor<fp16, [51866, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
-            tensor<fp16, [1, 1280]> var_26_cast_fp16 = gather(axis = var_26_axis_0, batch_dims = var_26_batch_dims_0, indices = input_ids, validate_indices = var_26_validate_indices_0, x = embed_tokens_weight_to_fp16)[name = string("op_26_cast_fp16")];
-            int32 var_33_axis_0 = const()[name = string("op_33_axis_0"), val = int32(0)];
-            int32 var_33_batch_dims_0 = const()[name = string("op_33_batch_dims_0"), val = int32(0)];
-            bool var_33_validate_indices_0 = const()[name = string("op_33_validate_indices_0"), val = bool(false)];
-            tensor<fp16, [448, 1280]> embed_positions_inlier_module_weight_to_fp16 = const()[name = string("embed_positions_inlier_module_weight_to_fp16"), val = tensor<fp16, [448, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132777088)))];
-            string cache_length_to_uint16_dtype_0 = const()[name = string("cache_length_to_uint16_dtype_0"), val = string("uint16")];
-            tensor<uint16, [1]> cache_length_to_uint16 = cast(dtype = cache_length_to_uint16_dtype_0, x = cache_length)[name = string("cast_0")];
-            tensor<fp16, [1, 1280]> var_33_cast_fp16_cast_uint16 = gather(axis = var_33_axis_0, batch_dims = var_33_batch_dims_0, indices = cache_length_to_uint16, validate_indices = var_33_validate_indices_0, x = embed_positions_inlier_module_weight_to_fp16)[name = string("op_33_cast_fp16_cast_uint16")];
-            int32 var_35_axis_0 = const()[name = string("op_35_axis_0"), val = int32(0)];
-            int32 var_35_batch_dims_0 = const()[name = string("op_35_batch_dims_0"), val = int32(0)];
-            bool var_35_validate_indices_0 = const()[name = string("op_35_validate_indices_0"), val = bool(false)];
-            tensor<fp16, [448, 1280]> embed_positions_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [448, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133941312))), nonzero_data = tensor<fp16, [8582]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133924032))))[name = string("embed_positions_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280]> var_35_cast_fp16_cast_uint16 = gather(axis = var_35_axis_0, batch_dims = var_35_batch_dims_0, indices = cache_length_to_uint16, validate_indices = var_35_validate_indices_0, x = embed_positions_outlier_module_weight_to_fp16_sparsified)[name = string("op_35_cast_fp16_cast_uint16")];
-            tensor<fp16, [1, 1280]> var_36_cast_fp16 = add(x = var_33_cast_fp16_cast_uint16, y = var_35_cast_fp16_cast_uint16)[name = string("op_36_cast_fp16")];
-            tensor<fp16, [1, 1280]> hidden_states_1_cast_fp16 = add(x = var_26_cast_fp16, y = var_36_cast_fp16)[name = string("hidden_states_1_cast_fp16")];
-            tensor<int32, [1]> var_50_axes_0 = const()[name = string("op_50_axes_0"), val = tensor<int32, [1]>([2])];
-            tensor<fp16, [1, 1280, 1]> var_50_cast_fp16 = expand_dims(axes = var_50_axes_0, x = hidden_states_1_cast_fp16)[name = string("op_50_cast_fp16")];
-            tensor<int32, [1]> inputs_1_axes_0 = const()[name = string("inputs_1_axes_0"), val = tensor<int32, [1]>([3])];
-            tensor<fp16, [1, 1280, 1, 1]> inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_50_cast_fp16)[name = string("inputs_1_cast_fp16")];
-            tensor<fp16, [4, 1280, 1, 448]> read_state_0 = read_state(input = self_attn_key_cache)[name = string("read_state_0")];
-            tensor<int32, [4]> tile_0 = const()[name = string("tile_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
-            int32 var_55_axis_0 = const()[name = string("op_55_axis_0"), val = int32(0)];
-            tensor<fp16, [1, 1280, 1, 448]> var_55_cast_fp16_0, tensor<fp16, [1, 1280, 1, 448]> var_55_cast_fp16_1, tensor<fp16, [1, 1280, 1, 448]> var_55_cast_fp16_2, tensor<fp16, [1, 1280, 1, 448]> var_55_cast_fp16_3 = split(axis = var_55_axis_0, split_sizes = tile_0, x = read_state_0)[name = string("op_55_cast_fp16")];
-            tensor<fp16, [4, 1280, 1, 448]> read_state_1 = read_state(input = self_attn_value_cache)[name = string("read_state_1")];
-            tensor<int32, [4]> tile_1 = const()[name = string("tile_1"), val = tensor<int32, [4]>([1, 1, 1, 1])];
-            int32 var_62_axis_0 = const()[name = string("op_62_axis_0"), val = int32(0)];
-            tensor<fp16, [1, 1280, 1, 448]> var_62_cast_fp16_0, tensor<fp16, [1, 1280, 1, 448]> var_62_cast_fp16_1, tensor<fp16, [1, 1280, 1, 448]> var_62_cast_fp16_2, tensor<fp16, [1, 1280, 1, 448]> var_62_cast_fp16_3 = split(axis = var_62_axis_0, split_sizes = tile_1, x = read_state_1)[name = string("op_62_cast_fp16")];
-            tensor<fp16, [4, 1280, 1, 1536]> read_state_2 = read_state(input = encoder_attn_key_cache)[name = string("read_state_2")];
-            tensor<int32, [4]> obj_17_begin_0 = const()[name = string("obj_17_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [4]> obj_17_end_0 = const()[name = string("obj_17_end_0"), val = tensor<int32, [4]>([1, 1280, 1, 1536])];
-            tensor<bool, [4]> obj_17_end_mask_0 = const()[name = string("obj_17_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
-            tensor<fp16, [1, 1280, 1, 1536]> obj_17_cast_fp16 = slice_by_index(begin = obj_17_begin_0, end = obj_17_end_0, end_mask = obj_17_end_mask_0, x = read_state_2)[name = string("obj_17_cast_fp16")];
-            tensor<fp16, [4, 1280, 1, 1536]> read_state_3 = read_state(input = encoder_attn_value_cache)[name = string("read_state_3")];
-            tensor<int32, [4]> obj_19_begin_0 = const()[name = string("obj_19_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [4]> obj_19_end_0 = const()[name = string("obj_19_end_0"), val = tensor<int32, [4]>([1, 1280, 1, 1536])];
-            tensor<bool, [4]> obj_19_end_mask_0 = const()[name = string("obj_19_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
-            tensor<fp16, [1, 1280, 1, 1536]> obj_19_cast_fp16 = slice_by_index(begin = obj_19_begin_0, end = obj_19_end_0, end_mask = obj_19_end_mask_0, x = read_state_3)[name = string("obj_19_cast_fp16")];
-            int32 var_82 = const()[name = string("op_82"), val = int32(3)];
-            tensor<int32, [1]> out_1_axes_0 = const()[name = string("out_1_axes_0"), val = tensor<int32, [1]>([1])];
-            fp16 var_107_to_fp16 = const()[name = string("op_107_to_fp16"), val = fp16(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_107_to_fp16, x = inputs_1_cast_fp16)[name = string("out_1_cast_fp16")];
-            tensor<fp16, [1280]> obj_5_mean_0_to_fp16 = const()[name = string("obj_5_mean_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134013056)))];
-            tensor<fp16, [1280]> obj_5_variance_0_to_fp16 = const()[name = string("obj_5_variance_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134015680)))];
-            tensor<fp16, [1280]> obj_5_gamma_0_to_fp16 = const()[name = string("obj_5_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134018304)))];
-            tensor<fp16, [1280]> obj_5_beta_0_to_fp16 = const()[name = string("obj_5_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134020928)))];
-            fp16 obj_5_epsilon_0_to_fp16 = const()[name = string("obj_5_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> obj_5_cast_fp16 = batch_norm(beta = obj_5_beta_0_to_fp16, epsilon = obj_5_epsilon_0_to_fp16, gamma = obj_5_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_1_cast_fp16)[name = string("obj_5_cast_fp16")];
-            string var_129_pad_type_0 = const()[name = string("op_129_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_129_strides_0 = const()[name = string("op_129_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_129_pad_0 = const()[name = string("op_129_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_129_dilations_0 = const()[name = string("op_129_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_129_groups_0 = const()[name = string("op_129_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134023552))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134842816))))[name = string("layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1280]> layers_0_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134842944)))];
-            tensor<fp16, [1, 1280, 1, 1]> var_129_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_129_dilations_0, groups = var_129_groups_0, pad = var_129_pad_0, pad_type = var_129_pad_type_0, strides = var_129_strides_0, weight = layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_129_cast_fp16")];
-            string var_135_pad_type_0 = const()[name = string("op_135_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_135_strides_0 = const()[name = string("op_135_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_135_pad_0 = const()[name = string("op_135_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_135_dilations_0 = const()[name = string("op_135_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_135_groups_0 = const()[name = string("op_135_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134918592))), nonzero_data = tensor<fp16, [36461]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134845568))))[name = string("layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_135_cast_fp16 = conv(dilations = var_135_dilations_0, groups = var_135_groups_0, pad = var_135_pad_0, pad_type = var_135_pad_type_0, strides = var_135_strides_0, weight = layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_135_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> query_1_cast_fp16 = add(x = var_129_cast_fp16, y = var_135_cast_fp16)[name = string("query_1_cast_fp16")];
-            string var_144_pad_type_0 = const()[name = string("op_144_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_144_strides_0 = const()[name = string("op_144_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_144_pad_0 = const()[name = string("op_144_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_144_dilations_0 = const()[name = string("op_144_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_144_groups_0 = const()[name = string("op_144_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135123456))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135942720))))[name = string("layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1, 1280, 1, 1]> var_144_cast_fp16 = conv(dilations = var_144_dilations_0, groups = var_144_groups_0, pad = var_144_pad_0, pad_type = var_144_pad_type_0, strides = var_144_strides_0, weight = layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_144_cast_fp16")];
-            string var_150_pad_type_0 = const()[name = string("op_150_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_150_strides_0 = const()[name = string("op_150_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_150_pad_0 = const()[name = string("op_150_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_150_dilations_0 = const()[name = string("op_150_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_150_groups_0 = const()[name = string("op_150_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135976320))), nonzero_data = tensor<fp16, [16673]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135942848))))[name = string("layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_150_cast_fp16 = conv(dilations = var_150_dilations_0, groups = var_150_groups_0, pad = var_150_pad_0, pad_type = var_150_pad_type_0, strides = var_150_strides_0, weight = layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_150_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> current_key_1_cast_fp16 = add(x = var_144_cast_fp16, y = var_150_cast_fp16)[name = string("current_key_1_cast_fp16")];
-            string var_160_pad_type_0 = const()[name = string("op_160_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_160_strides_0 = const()[name = string("op_160_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_160_pad_0 = const()[name = string("op_160_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_160_dilations_0 = const()[name = string("op_160_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_160_groups_0 = const()[name = string("op_160_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136181184))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137000448))))[name = string("layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1280]> layers_0_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137000576)))];
-            tensor<fp16, [1, 1280, 1, 1]> var_160_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_160_dilations_0, groups = var_160_groups_0, pad = var_160_pad_0, pad_type = var_160_pad_type_0, strides = var_160_strides_0, weight = layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_160_cast_fp16")];
-            string var_166_pad_type_0 = const()[name = string("op_166_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_166_strides_0 = const()[name = string("op_166_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_166_pad_0 = const()[name = string("op_166_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_166_dilations_0 = const()[name = string("op_166_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_166_groups_0 = const()[name = string("op_166_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137046720))), nonzero_data = tensor<fp16, [21721]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137003200))))[name = string("layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_166_cast_fp16 = conv(dilations = var_166_dilations_0, groups = var_166_groups_0, pad = var_166_pad_0, pad_type = var_166_pad_type_0, strides = var_166_strides_0, weight = layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_166_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> current_value_1_cast_fp16 = add(x = var_160_cast_fp16, y = var_166_cast_fp16)[name = string("current_value_1_cast_fp16")];
-            tensor<int32, [1]> var_169_axes_0 = const()[name = string("op_169_axes_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 448]> var_169_cast_fp16 = expand_dims(axes = var_169_axes_0, x = kv_cache_update_mask)[name = string("op_169_cast_fp16")];
-            tensor<int32, [1]> var_170_axes_0 = const()[name = string("op_170_axes_0"), val = tensor<int32, [1]>([2])];
-            tensor<fp16, [1, 1, 1, 448]> var_170_cast_fp16 = expand_dims(axes = var_170_axes_0, x = var_169_cast_fp16)[name = string("op_170_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 448]> var_172_cast_fp16 = mul(x = current_key_1_cast_fp16, y = var_170_cast_fp16)[name = string("op_172_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 448]> key_1_cast_fp16 = add(x = var_55_cast_fp16_0, y = var_172_cast_fp16)[name = string("key_1_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 448]> var_174_cast_fp16 = mul(x = current_value_1_cast_fp16, y = var_170_cast_fp16)[name = string("op_174_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 448]> value_1_cast_fp16 = add(x = var_62_cast_fp16_0, y = var_174_cast_fp16)[name = string("value_1_cast_fp16")];
-            tensor<int32, [4]> var_177 = const()[name = string("op_177"), val = tensor<int32, [4]>([1, 20, 64, -1])];
-            tensor<fp16, [1, 20, 64, 1]> mh_q_1_cast_fp16 = reshape(shape = var_177, x = query_1_cast_fp16)[name = string("mh_q_1_cast_fp16")];
-            fp16 var_179_to_fp16 = const()[name = string("op_179_to_fp16"), val = fp16(0x1p-3)];
-            tensor<fp16, [1, 20, 64, 1]> var_180_cast_fp16 = mul(x = mh_q_1_cast_fp16, y = var_179_to_fp16)[name = string("op_180_cast_fp16")];
-            tensor<int32, [4]> var_181 = const()[name = string("op_181"), val = tensor<int32, [4]>([1, 20, 64, -1])];
-            tensor<fp16, [1, 20, 64, 448]> var_182_cast_fp16 = reshape(shape = var_181, x = key_1_cast_fp16)[name = string("op_182_cast_fp16")];
-            bool mh_w_1_transpose_x_0 = const()[name = string("mh_w_1_transpose_x_0"), val = bool(true)];
-            bool mh_w_1_transpose_y_0 = const()[name = string("mh_w_1_transpose_y_0"), val = bool(false)];
-            tensor<fp16, [1, 20, 1, 448]> mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_180_cast_fp16, y = var_182_cast_fp16)[name = string("mh_w_1_cast_fp16")];
-            tensor<int32, [1]> var_186_axes_0 = const()[name = string("op_186_axes_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 448]> var_186_cast_fp16 = expand_dims(axes = var_186_axes_0, x = decoder_key_padding_mask)[name = string("op_186_cast_fp16")];
-            tensor<int32, [1]> var_187_axes_0 = const()[name = string("op_187_axes_0"), val = tensor<int32, [1]>([2])];
-            tensor<fp16, [1, 1, 1, 448]> var_187_cast_fp16 = expand_dims(axes = var_187_axes_0, x = var_186_cast_fp16)[name = string("op_187_cast_fp16")];
-            tensor<fp16, [1, 20, 1, 448]> mh_w_3_cast_fp16 = add(x = mh_w_1_cast_fp16, y = var_187_cast_fp16)[name = string("mh_w_3_cast_fp16")];
-            tensor<fp16, [1, 20, 1, 448]> var_190_cast_fp16 = softmax(axis = var_82, x = mh_w_3_cast_fp16)[name = string("op_190_cast_fp16")];
-            tensor<int32, [4]> var_191 = const()[name = string("op_191"), val = tensor<int32, [4]>([1, 20, 64, -1])];
-            tensor<fp16, [1, 20, 64, 448]> var_192_cast_fp16 = reshape(shape = var_191, x = value_1_cast_fp16)[name = string("op_192_cast_fp16")];
-            bool attn_1_transpose_x_0 = const()[name = string("attn_1_transpose_x_0"), val = bool(false)];
-            bool attn_1_transpose_y_0 = const()[name = string("attn_1_transpose_y_0"), val = bool(true)];
-            tensor<fp16, [1, 20, 64, 1]> attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = var_192_cast_fp16, y = var_190_cast_fp16)[name = string("attn_1_cast_fp16")];
-            tensor<int32, [4]> var_195 = const()[name = string("op_195"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
-            tensor<fp16, [1, 1280, 1, 1]> input_1_cast_fp16 = reshape(shape = var_195, x = attn_1_cast_fp16)[name = string("input_1_cast_fp16")];
-            string var_205_pad_type_0 = const()[name = string("op_205_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_205_strides_0 = const()[name = string("op_205_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_205_pad_0 = const()[name = string("op_205_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_205_dilations_0 = const()[name = string("op_205_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_205_groups_0 = const()[name = string("op_205_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137251584))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138070848))))[name = string("layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1280]> layers_0_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138070976)))];
-            tensor<fp16, [1, 1280, 1, 1]> var_205_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_205_dilations_0, groups = var_205_groups_0, pad = var_205_pad_0, pad_type = var_205_pad_type_0, strides = var_205_strides_0, weight = layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = string("op_205_cast_fp16")];
-            string var_211_pad_type_0 = const()[name = string("op_211_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_211_strides_0 = const()[name = string("op_211_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_211_pad_0 = const()[name = string("op_211_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_211_dilations_0 = const()[name = string("op_211_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_211_groups_0 = const()[name = string("op_211_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138130624))), nonzero_data = tensor<fp16, [28455]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138073600))))[name = string("layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_211_cast_fp16 = conv(dilations = var_211_dilations_0, groups = var_211_groups_0, pad = var_211_pad_0, pad_type = var_211_pad_type_0, strides = var_211_strides_0, weight = layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_1_cast_fp16)[name = string("op_211_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> obj_11_cast_fp16 = add(x = var_205_cast_fp16, y = var_211_cast_fp16)[name = string("obj_11_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_11_cast_fp16)[name = string("inputs_3_cast_fp16")];
-            tensor<int32, [1]> out_3_axes_0 = const()[name = string("out_3_axes_0"), val = tensor<int32, [1]>([1])];
-            fp16 var_226_to_fp16 = const()[name = string("op_226_to_fp16"), val = fp16(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> out_3_cast_fp16 = layer_norm(axes = out_3_axes_0, epsilon = var_226_to_fp16, x = inputs_3_cast_fp16)[name = string("out_3_cast_fp16")];
-            tensor<fp16, [1280]> obj_13_gamma_0_to_fp16 = const()[name = string("obj_13_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138335488)))];
-            tensor<fp16, [1280]> obj_13_beta_0_to_fp16 = const()[name = string("obj_13_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138338112)))];
-            fp16 obj_13_epsilon_0_to_fp16 = const()[name = string("obj_13_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_3_cast_fp16)[name = string("obj_13_cast_fp16")];
-            string var_246_pad_type_0 = const()[name = string("op_246_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_246_strides_0 = const()[name = string("op_246_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_246_pad_0 = const()[name = string("op_246_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_246_dilations_0 = const()[name = string("op_246_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_246_groups_0 = const()[name = string("op_246_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138340736))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139160000))))[name = string("layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1280]> layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139160128)))];
-            tensor<fp16, [1, 1280, 1, 1]> var_246_cast_fp16 = conv(bias = layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_246_dilations_0, groups = var_246_groups_0, pad = var_246_pad_0, pad_type = var_246_pad_type_0, strides = var_246_strides_0, weight = layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("op_246_cast_fp16")];
-            string var_252_pad_type_0 = const()[name = string("op_252_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_252_strides_0 = const()[name = string("op_252_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_252_pad_0 = const()[name = string("op_252_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_252_dilations_0 = const()[name = string("op_252_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_252_groups_0 = const()[name = string("op_252_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139188224))), nonzero_data = tensor<fp16, [12701]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139162752))))[name = string("layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_252_cast_fp16 = conv(dilations = var_252_dilations_0, groups = var_252_groups_0, pad = var_252_pad_0, pad_type = var_252_pad_type_0, strides = var_252_strides_0, weight = layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = string("op_252_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> query_3_cast_fp16 = add(x = var_246_cast_fp16, y = var_252_cast_fp16)[name = string("query_3_cast_fp16")];
-            tensor<int32, [4]> var_255 = const()[name = string("op_255"), val = tensor<int32, [4]>([1, 20, 64, -1])];
-            tensor<fp16, [1, 20, 64, 1]> mh_q_3_cast_fp16 = reshape(shape = var_255, x = query_3_cast_fp16)[name = string("mh_q_3_cast_fp16")];
-            fp16 var_257_to_fp16 = const()[name = string("op_257_to_fp16"), val = fp16(0x1p-3)];
-            tensor<fp16, [1, 20, 64, 1]> var_258_cast_fp16 = mul(x = mh_q_3_cast_fp16, y = var_257_to_fp16)[name = string("op_258_cast_fp16")];
-            tensor<int32, [4]> var_259 = const()[name = string("op_259"), val = tensor<int32, [4]>([1, 20, 64, -1])];
-            tensor<fp16, [1, 20, 64, 1536]> var_260_cast_fp16 = reshape(shape = var_259, x = obj_17_cast_fp16)[name = string("op_260_cast_fp16")];
-            bool mh_w_5_transpose_x_0 = const()[name = string("mh_w_5_transpose_x_0"), val = bool(true)];
-            bool mh_w_5_transpose_y_0 = const()[name = string("mh_w_5_transpose_y_0"), val = bool(false)];
-            tensor<fp16, [1, 20, 1, 1536]> mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_258_cast_fp16, y = var_260_cast_fp16)[name = string("mh_w_5_cast_fp16")];
-            tensor<fp16, [1, 1536]> read_state_4 = read_state(input = encoder_attn_key_padding_mask)[name = string("read_state_4")];
-            tensor<int32, [1]> var_264_axes_0 = const()[name = string("op_264_axes_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1536]> var_264_cast_fp16 = expand_dims(axes = var_264_axes_0, x = read_state_4)[name = string("op_264_cast_fp16")];
-            tensor<int32, [1]> var_265_axes_0 = const()[name = string("op_265_axes_0"), val = tensor<int32, [1]>([2])];
-            tensor<fp16, [1, 1, 1, 1536]> var_265_cast_fp16 = expand_dims(axes = var_265_axes_0, x = var_264_cast_fp16)[name = string("op_265_cast_fp16")];
-            tensor<fp16, [1, 20, 1, 1536]> mh_w_7_cast_fp16 = add(x = mh_w_5_cast_fp16, y = var_265_cast_fp16)[name = string("mh_w_7_cast_fp16")];
-            tensor<fp16, [1, 20, 1, 1536]> obj_23_cast_fp16 = softmax(axis = var_82, x = mh_w_7_cast_fp16)[name = string("obj_23_cast_fp16")];
-            tensor<int32, [4]> var_269 = const()[name = string("op_269"), val = tensor<int32, [4]>([1, 20, 64, -1])];
-            tensor<fp16, [1, 20, 64, 1536]> var_270_cast_fp16 = reshape(shape = var_269, x = obj_19_cast_fp16)[name = string("op_270_cast_fp16")];
-            bool attn_3_transpose_x_0 = const()[name = string("attn_3_transpose_x_0"), val = bool(false)];
-            bool attn_3_transpose_y_0 = const()[name = string("attn_3_transpose_y_0"), val = bool(true)];
-            tensor<fp16, [1, 20, 64, 1]> attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_270_cast_fp16, y = obj_23_cast_fp16)[name = string("attn_3_cast_fp16")];
-            tensor<int32, [4]> var_273 = const()[name = string("op_273"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
-            tensor<fp16, [1, 1280, 1, 1]> input_3_cast_fp16 = reshape(shape = var_273, x = attn_3_cast_fp16)[name = string("input_3_cast_fp16")];
-            string var_283_pad_type_0 = const()[name = string("op_283_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_283_strides_0 = const()[name = string("op_283_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_283_pad_0 = const()[name = string("op_283_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_283_dilations_0 = const()[name = string("op_283_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_283_groups_0 = const()[name = string("op_283_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139393088))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140212352))))[name = string("layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1280]> layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140212480)))];
-            tensor<fp16, [1, 1280, 1, 1]> var_283_cast_fp16 = conv(bias = layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_283_dilations_0, groups = var_283_groups_0, pad = var_283_pad_0, pad_type = var_283_pad_type_0, strides = var_283_strides_0, weight = layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = string("op_283_cast_fp16")];
-            string var_289_pad_type_0 = const()[name = string("op_289_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_289_strides_0 = const()[name = string("op_289_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_289_pad_0 = const()[name = string("op_289_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_289_dilations_0 = const()[name = string("op_289_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_289_groups_0 = const()[name = string("op_289_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140227264))), nonzero_data = tensor<fp16, [6041]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140215104))))[name = string("layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_289_cast_fp16 = conv(dilations = var_289_dilations_0, groups = var_289_groups_0, pad = var_289_pad_0, pad_type = var_289_pad_type_0, strides = var_289_strides_0, weight = layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_3_cast_fp16)[name = string("op_289_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> obj_21_cast_fp16 = add(x = var_283_cast_fp16, y = var_289_cast_fp16)[name = string("obj_21_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = obj_21_cast_fp16)[name = string("inputs_5_cast_fp16")];
-            tensor<int32, [1]> out_5_axes_0 = const()[name = string("out_5_axes_0"), val = tensor<int32, [1]>([1])];
-            fp16 var_300_to_fp16 = const()[name = string("op_300_to_fp16"), val = fp16(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> out_5_cast_fp16 = layer_norm(axes = out_5_axes_0, epsilon = var_300_to_fp16, x = inputs_5_cast_fp16)[name = string("out_5_cast_fp16")];
-            tensor<fp16, [1280]> input_5_gamma_0_to_fp16 = const()[name = string("input_5_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140432128)))];
-            tensor<fp16, [1280]> input_5_beta_0_to_fp16 = const()[name = string("input_5_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140434752)))];
-            fp16 input_5_epsilon_0_to_fp16 = const()[name = string("input_5_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> input_5_cast_fp16 = batch_norm(beta = input_5_beta_0_to_fp16, epsilon = input_5_epsilon_0_to_fp16, gamma = input_5_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_5_cast_fp16)[name = string("input_5_cast_fp16")];
-            string var_318_pad_type_0 = const()[name = string("op_318_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_318_strides_0 = const()[name = string("op_318_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_318_pad_0 = const()[name = string("op_318_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_318_dilations_0 = const()[name = string("op_318_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_318_groups_0 = const()[name = string("op_318_groups_0"), val = int32(1)];
-            tensor<fp16, [5120, 1280, 1, 1]> layers_0_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140437376))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143714240))))[name = string("layers_0_fc1_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [5120]> layers_0_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_0_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143714368)))];
-            tensor<fp16, [1, 5120, 1, 1]> var_318_cast_fp16 = conv(bias = layers_0_fc1_inlier_module_bias_to_fp16, dilations = var_318_dilations_0, groups = var_318_groups_0, pad = var_318_pad_0, pad_type = var_318_pad_type_0, strides = var_318_strides_0, weight = layers_0_fc1_inlier_module_weight_to_fp16_palettized, x = input_5_cast_fp16)[name = string("op_318_cast_fp16")];
-            string var_324_pad_type_0 = const()[name = string("op_324_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_324_strides_0 = const()[name = string("op_324_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_324_pad_0 = const()[name = string("op_324_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_324_dilations_0 = const()[name = string("op_324_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_324_groups_0 = const()[name = string("op_324_groups_0"), val = int32(1)];
-            tensor<fp16, [5120, 1280, 1, 1]> layers_0_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143826240))), nonzero_data = tensor<fp16, [50752]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143724672))))[name = string("layers_0_fc1_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 5120, 1, 1]> var_324_cast_fp16 = conv(dilations = var_324_dilations_0, groups = var_324_groups_0, pad = var_324_pad_0, pad_type = var_324_pad_type_0, strides = var_324_strides_0, weight = layers_0_fc1_outlier_module_weight_to_fp16_sparsified, x = input_5_cast_fp16)[name = string("op_324_cast_fp16")];
-            tensor<fp16, [1, 5120, 1, 1]> input_7_cast_fp16 = add(x = var_318_cast_fp16, y = var_324_cast_fp16)[name = string("input_7_cast_fp16")];
-            string input_9_mode_0 = const()[name = string("input_9_mode_0"), val = string("EXACT")];
-            tensor<fp16, [1, 5120, 1, 1]> input_9_cast_fp16 = gelu(mode = input_9_mode_0, x = input_7_cast_fp16)[name = string("input_9_cast_fp16")];
-            string var_335_pad_type_0 = const()[name = string("op_335_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_335_strides_0 = const()[name = string("op_335_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_335_pad_0 = const()[name = string("op_335_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_335_dilations_0 = const()[name = string("op_335_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_335_groups_0 = const()[name = string("op_335_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 5120, 1, 1]> layers_0_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144645504))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147922368))))[name = string("layers_0_fc2_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1280]> layers_0_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_0_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147922496)))];
-            tensor<fp16, [1, 1280, 1, 1]> var_335_cast_fp16 = conv(bias = layers_0_fc2_inlier_module_bias_to_fp16, dilations = var_335_dilations_0, groups = var_335_groups_0, pad = var_335_pad_0, pad_type = var_335_pad_type_0, strides = var_335_strides_0, weight = layers_0_fc2_inlier_module_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = string("op_335_cast_fp16")];
-            string var_341_pad_type_0 = const()[name = string("op_341_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_341_strides_0 = const()[name = string("op_341_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_341_pad_0 = const()[name = string("op_341_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_341_dilations_0 = const()[name = string("op_341_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_341_groups_0 = const()[name = string("op_341_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 5120, 1, 1]> layers_0_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148107648))), nonzero_data = tensor<fp16, [91213]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147925120))))[name = string("layers_0_fc2_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_341_cast_fp16 = conv(dilations = var_341_dilations_0, groups = var_341_groups_0, pad = var_341_pad_0, pad_type = var_341_pad_type_0, strides = var_341_strides_0, weight = layers_0_fc2_outlier_module_weight_to_fp16_sparsified, x = input_9_cast_fp16)[name = string("op_341_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> hidden_states_3_cast_fp16 = add(x = var_335_cast_fp16, y = var_341_cast_fp16)[name = string("hidden_states_3_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = hidden_states_3_cast_fp16)[name = string("inputs_7_cast_fp16")];
-            tensor<int32, [4]> obj_35_begin_0 = const()[name = string("obj_35_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
-            tensor<int32, [4]> obj_35_end_0 = const()[name = string("obj_35_end_0"), val = tensor<int32, [4]>([2, 1280, 1, 1536])];
-            tensor<bool, [4]> obj_35_end_mask_0 = const()[name = string("obj_35_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
-            tensor<fp16, [1, 1280, 1, 1536]> obj_35_cast_fp16 = slice_by_index(begin = obj_35_begin_0, end = obj_35_end_0, end_mask = obj_35_end_mask_0, x = read_state_2)[name = string("obj_35_cast_fp16")];
-            tensor<int32, [4]> obj_37_begin_0 = const()[name = string("obj_37_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
-            tensor<int32, [4]> obj_37_end_0 = const()[name = string("obj_37_end_0"), val = tensor<int32, [4]>([2, 1280, 1, 1536])];
-            tensor<bool, [4]> obj_37_end_mask_0 = const()[name = string("obj_37_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
-            tensor<fp16, [1, 1280, 1, 1536]> obj_37_cast_fp16 = slice_by_index(begin = obj_37_begin_0, end = obj_37_end_0, end_mask = obj_37_end_mask_0, x = read_state_3)[name = string("obj_37_cast_fp16")];
-            int32 var_363 = const()[name = string("op_363"), val = int32(3)];
-            tensor<int32, [1]> out_7_axes_0 = const()[name = string("out_7_axes_0"), val = tensor<int32, [1]>([1])];
-            fp16 var_388_to_fp16 = const()[name = string("op_388_to_fp16"), val = fp16(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_388_to_fp16, x = inputs_7_cast_fp16)[name = string("out_7_cast_fp16")];
-            tensor<fp16, [1280]> obj_25_gamma_0_to_fp16 = const()[name = string("obj_25_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148926912)))];
-            tensor<fp16, [1280]> obj_25_beta_0_to_fp16 = const()[name = string("obj_25_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148929536)))];
-            fp16 obj_25_epsilon_0_to_fp16 = const()[name = string("obj_25_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> obj_25_cast_fp16 = batch_norm(beta = obj_25_beta_0_to_fp16, epsilon = obj_25_epsilon_0_to_fp16, gamma = obj_25_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_7_cast_fp16)[name = string("obj_25_cast_fp16")];
-            string var_410_pad_type_0 = const()[name = string("op_410_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_410_strides_0 = const()[name = string("op_410_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_410_pad_0 = const()[name = string("op_410_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_410_dilations_0 = const()[name = string("op_410_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_410_groups_0 = const()[name = string("op_410_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148932160))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149751424))))[name = string("layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1280]> layers_1_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149751552)))];
-            tensor<fp16, [1, 1280, 1, 1]> var_410_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_410_dilations_0, groups = var_410_groups_0, pad = var_410_pad_0, pad_type = var_410_pad_type_0, strides = var_410_strides_0, weight = layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_410_cast_fp16")];
-            string var_416_pad_type_0 = const()[name = string("op_416_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_416_strides_0 = const()[name = string("op_416_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_416_pad_0 = const()[name = string("op_416_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_416_dilations_0 = const()[name = string("op_416_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_416_groups_0 = const()[name = string("op_416_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149814272))), nonzero_data = tensor<fp16, [29985]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149754176))))[name = string("layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_416_cast_fp16 = conv(dilations = var_416_dilations_0, groups = var_416_groups_0, pad = var_416_pad_0, pad_type = var_416_pad_type_0, strides = var_416_strides_0, weight = layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_416_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> query_5_cast_fp16 = add(x = var_410_cast_fp16, y = var_416_cast_fp16)[name = string("query_5_cast_fp16")];
-            string var_425_pad_type_0 = const()[name = string("op_425_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_425_strides_0 = const()[name = string("op_425_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_425_pad_0 = const()[name = string("op_425_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_425_dilations_0 = const()[name = string("op_425_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_425_groups_0 = const()[name = string("op_425_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150019136))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150838400))))[name = string("layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1, 1280, 1, 1]> var_425_cast_fp16 = conv(dilations = var_425_dilations_0, groups = var_425_groups_0, pad = var_425_pad_0, pad_type = var_425_pad_type_0, strides = var_425_strides_0, weight = layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_425_cast_fp16")];
-            string var_431_pad_type_0 = const()[name = string("op_431_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_431_strides_0 = const()[name = string("op_431_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_431_pad_0 = const()[name = string("op_431_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_431_dilations_0 = const()[name = string("op_431_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_431_groups_0 = const()[name = string("op_431_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150885184))), nonzero_data = tensor<fp16, [23287]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150838528))))[name = string("layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_431_cast_fp16 = conv(dilations = var_431_dilations_0, groups = var_431_groups_0, pad = var_431_pad_0, pad_type = var_431_pad_type_0, strides = var_431_strides_0, weight = layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_431_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> current_key_3_cast_fp16 = add(x = var_425_cast_fp16, y = var_431_cast_fp16)[name = string("current_key_3_cast_fp16")];
-            string var_441_pad_type_0 = const()[name = string("op_441_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_441_strides_0 = const()[name = string("op_441_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_441_pad_0 = const()[name = string("op_441_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_441_dilations_0 = const()[name = string("op_441_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_441_groups_0 = const()[name = string("op_441_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151090048))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151909312))))[name = string("layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1280]> layers_1_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151909440)))];
-            tensor<fp16, [1, 1280, 1, 1]> var_441_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_441_dilations_0, groups = var_441_groups_0, pad = var_441_pad_0, pad_type = var_441_pad_type_0, strides = var_441_strides_0, weight = layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_441_cast_fp16")];
-            string var_447_pad_type_0 = const()[name = string("op_447_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_447_strides_0 = const()[name = string("op_447_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_447_pad_0 = const()[name = string("op_447_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_447_dilations_0 = const()[name = string("op_447_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_447_groups_0 = const()[name = string("op_447_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151934720))), nonzero_data = tensor<fp16, [11267]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151912064))))[name = string("layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_447_cast_fp16 = conv(dilations = var_447_dilations_0, groups = var_447_groups_0, pad = var_447_pad_0, pad_type = var_447_pad_type_0, strides = var_447_strides_0, weight = layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_447_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> current_value_3_cast_fp16 = add(x = var_441_cast_fp16, y = var_447_cast_fp16)[name = string("current_value_3_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 448]> var_453_cast_fp16 = mul(x = current_key_3_cast_fp16, y = var_170_cast_fp16)[name = string("op_453_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 448]> key_3_cast_fp16 = add(x = var_55_cast_fp16_1, y = var_453_cast_fp16)[name = string("key_3_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 448]> var_455_cast_fp16 = mul(x = current_value_3_cast_fp16, y = var_170_cast_fp16)[name = string("op_455_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 448]> value_3_cast_fp16 = add(x = var_62_cast_fp16_1, y = var_455_cast_fp16)[name = string("value_3_cast_fp16")];
-            tensor<int32, [4]> var_458 = const()[name = string("op_458"), val = tensor<int32, [4]>([1, 20, 64, -1])];
-            tensor<fp16, [1, 20, 64, 1]> mh_q_5_cast_fp16 = reshape(shape = var_458, x = query_5_cast_fp16)[name = string("mh_q_5_cast_fp16")];
-            fp16 var_460_to_fp16 = const()[name = string("op_460_to_fp16"), val = fp16(0x1p-3)];
-            tensor<fp16, [1, 20, 64, 1]> var_461_cast_fp16 = mul(x = mh_q_5_cast_fp16, y = var_460_to_fp16)[name = string("op_461_cast_fp16")];
-            tensor<int32, [4]> var_462 = const()[name = string("op_462"), val = tensor<int32, [4]>([1, 20, 64, -1])];
-            tensor<fp16, [1, 20, 64, 448]> var_463_cast_fp16 = reshape(shape = var_462, x = key_3_cast_fp16)[name = string("op_463_cast_fp16")];
-            bool mh_w_9_transpose_x_0 = const()[name = string("mh_w_9_transpose_x_0"), val = bool(true)];
-            bool mh_w_9_transpose_y_0 = const()[name = string("mh_w_9_transpose_y_0"), val = bool(false)];
-            tensor<fp16, [1, 20, 1, 448]> mh_w_9_cast_fp16 = matmul(transpose_x = mh_w_9_transpose_x_0, transpose_y = mh_w_9_transpose_y_0, x = var_461_cast_fp16, y = var_463_cast_fp16)[name = string("mh_w_9_cast_fp16")];
-            tensor<fp16, [1, 20, 1, 448]> mh_w_11_cast_fp16 = add(x = mh_w_9_cast_fp16, y = var_187_cast_fp16)[name = string("mh_w_11_cast_fp16")];
-            tensor<fp16, [1, 20, 1, 448]> var_471_cast_fp16 = softmax(axis = var_363, x = mh_w_11_cast_fp16)[name = string("op_471_cast_fp16")];
-            tensor<int32, [4]> var_472 = const()[name = string("op_472"), val = tensor<int32, [4]>([1, 20, 64, -1])];
-            tensor<fp16, [1, 20, 64, 448]> var_473_cast_fp16 = reshape(shape = var_472, x = value_3_cast_fp16)[name = string("op_473_cast_fp16")];
-            bool attn_5_transpose_x_0 = const()[name = string("attn_5_transpose_x_0"), val = bool(false)];
-            bool attn_5_transpose_y_0 = const()[name = string("attn_5_transpose_y_0"), val = bool(true)];
-            tensor<fp16, [1, 20, 64, 1]> attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = var_473_cast_fp16, y = var_471_cast_fp16)[name = string("attn_5_cast_fp16")];
-            tensor<int32, [4]> var_476 = const()[name = string("op_476"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
-            tensor<fp16, [1, 1280, 1, 1]> input_11_cast_fp16 = reshape(shape = var_476, x = attn_5_cast_fp16)[name = string("input_11_cast_fp16")];
-            string var_486_pad_type_0 = const()[name = string("op_486_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_486_strides_0 = const()[name = string("op_486_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_486_pad_0 = const()[name = string("op_486_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_486_dilations_0 = const()[name = string("op_486_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_486_groups_0 = const()[name = string("op_486_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152139584))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152958848))))[name = string("layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1280]> layers_1_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152958976)))];
-            tensor<fp16, [1, 1280, 1, 1]> var_486_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_486_dilations_0, groups = var_486_groups_0, pad = var_486_pad_0, pad_type = var_486_pad_type_0, strides = var_486_strides_0, weight = layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = string("op_486_cast_fp16")];
-            string var_492_pad_type_0 = const()[name = string("op_492_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_492_strides_0 = const()[name = string("op_492_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_492_pad_0 = const()[name = string("op_492_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_492_dilations_0 = const()[name = string("op_492_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_492_groups_0 = const()[name = string("op_492_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152986048))), nonzero_data = tensor<fp16, [12187]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152961600))))[name = string("layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_492_cast_fp16 = conv(dilations = var_492_dilations_0, groups = var_492_groups_0, pad = var_492_pad_0, pad_type = var_492_pad_type_0, strides = var_492_strides_0, weight = layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_11_cast_fp16)[name = string("op_492_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> obj_31_cast_fp16 = add(x = var_486_cast_fp16, y = var_492_cast_fp16)[name = string("obj_31_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = obj_31_cast_fp16)[name = string("inputs_9_cast_fp16")];
-            tensor<int32, [1]> out_9_axes_0 = const()[name = string("out_9_axes_0"), val = tensor<int32, [1]>([1])];
-            fp16 var_507_to_fp16 = const()[name = string("op_507_to_fp16"), val = fp16(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> out_9_cast_fp16 = layer_norm(axes = out_9_axes_0, epsilon = var_507_to_fp16, x = inputs_9_cast_fp16)[name = string("out_9_cast_fp16")];
-            tensor<fp16, [1280]> obj_33_gamma_0_to_fp16 = const()[name = string("obj_33_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153190912)))];
-            tensor<fp16, [1280]> obj_33_beta_0_to_fp16 = const()[name = string("obj_33_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153193536)))];
-            fp16 obj_33_epsilon_0_to_fp16 = const()[name = string("obj_33_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> obj_33_cast_fp16 = batch_norm(beta = obj_33_beta_0_to_fp16, epsilon = obj_33_epsilon_0_to_fp16, gamma = obj_33_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_9_cast_fp16)[name = string("obj_33_cast_fp16")];
-            string var_527_pad_type_0 = const()[name = string("op_527_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_527_strides_0 = const()[name = string("op_527_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_527_pad_0 = const()[name = string("op_527_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_527_dilations_0 = const()[name = string("op_527_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_527_groups_0 = const()[name = string("op_527_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153196160))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154015424))))[name = string("layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1280]> layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154015552)))];
-            tensor<fp16, [1, 1280, 1, 1]> var_527_cast_fp16 = conv(bias = layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_527_dilations_0, groups = var_527_groups_0, pad = var_527_pad_0, pad_type = var_527_pad_type_0, strides = var_527_strides_0, weight = layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = string("op_527_cast_fp16")];
-            string var_533_pad_type_0 = const()[name = string("op_533_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_533_strides_0 = const()[name = string("op_533_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_533_pad_0 = const()[name = string("op_533_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_533_dilations_0 = const()[name = string("op_533_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_533_groups_0 = const()[name = string("op_533_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154061248))), nonzero_data = tensor<fp16, [21483]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154018176))))[name = string("layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_533_cast_fp16 = conv(dilations = var_533_dilations_0, groups = var_533_groups_0, pad = var_533_pad_0, pad_type = var_533_pad_type_0, strides = var_533_strides_0, weight = layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = string("op_533_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> query_7_cast_fp16 = add(x = var_527_cast_fp16, y = var_533_cast_fp16)[name = string("query_7_cast_fp16")];
-            tensor<int32, [4]> var_536 = const()[name = string("op_536"), val = tensor<int32, [4]>([1, 20, 64, -1])];
-            tensor<fp16, [1, 20, 64, 1]> mh_q_7_cast_fp16 = reshape(shape = var_536, x = query_7_cast_fp16)[name = string("mh_q_7_cast_fp16")];
-            fp16 var_538_to_fp16 = const()[name = string("op_538_to_fp16"), val = fp16(0x1p-3)];
-            tensor<fp16, [1, 20, 64, 1]> var_539_cast_fp16 = mul(x = mh_q_7_cast_fp16, y = var_538_to_fp16)[name = string("op_539_cast_fp16")];
-            tensor<int32, [4]> var_540 = const()[name = string("op_540"), val = tensor<int32, [4]>([1, 20, 64, -1])];
-            tensor<fp16, [1, 20, 64, 1536]> var_541_cast_fp16 = reshape(shape = var_540, x = obj_35_cast_fp16)[name = string("op_541_cast_fp16")];
-            bool mh_w_13_transpose_x_0 = const()[name = string("mh_w_13_transpose_x_0"), val = bool(true)];
-            bool mh_w_13_transpose_y_0 = const()[name = string("mh_w_13_transpose_y_0"), val = bool(false)];
-            tensor<fp16, [1, 20, 1, 1536]> mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_539_cast_fp16, y = var_541_cast_fp16)[name = string("mh_w_13_cast_fp16")];
-            tensor<fp16, [1, 20, 1, 1536]> mh_w_15_cast_fp16 = add(x = mh_w_13_cast_fp16, y = var_265_cast_fp16)[name = string("mh_w_15_cast_fp16")];
-            tensor<fp16, [1, 20, 1, 1536]> obj_41_cast_fp16 = softmax(axis = var_363, x = mh_w_15_cast_fp16)[name = string("obj_41_cast_fp16")];
-            tensor<int32, [4]> var_550 = const()[name = string("op_550"), val = tensor<int32, [4]>([1, 20, 64, -1])];
-            tensor<fp16, [1, 20, 64, 1536]> var_551_cast_fp16 = reshape(shape = var_550, x = obj_37_cast_fp16)[name = string("op_551_cast_fp16")];
-            bool attn_7_transpose_x_0 = const()[name = string("attn_7_transpose_x_0"), val = bool(false)];
-            bool attn_7_transpose_y_0 = const()[name = string("attn_7_transpose_y_0"), val = bool(true)];
-            tensor<fp16, [1, 20, 64, 1]> attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = var_551_cast_fp16, y = obj_41_cast_fp16)[name = string("attn_7_cast_fp16")];
-            tensor<int32, [4]> var_554 = const()[name = string("op_554"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
-            tensor<fp16, [1, 1280, 1, 1]> input_13_cast_fp16 = reshape(shape = var_554, x = attn_7_cast_fp16)[name = string("input_13_cast_fp16")];
-            string var_564_pad_type_0 = const()[name = string("op_564_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_564_strides_0 = const()[name = string("op_564_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_564_pad_0 = const()[name = string("op_564_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_564_dilations_0 = const()[name = string("op_564_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_564_groups_0 = const()[name = string("op_564_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154266112))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155085376))))[name = string("layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1280]> layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155085504)))];
-            tensor<fp16, [1, 1280, 1, 1]> var_564_cast_fp16 = conv(bias = layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_564_dilations_0, groups = var_564_groups_0, pad = var_564_pad_0, pad_type = var_564_pad_type_0, strides = var_564_strides_0, weight = layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_13_cast_fp16)[name = string("op_564_cast_fp16")];
-            string var_570_pad_type_0 = const()[name = string("op_570_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_570_strides_0 = const()[name = string("op_570_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_570_pad_0 = const()[name = string("op_570_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_570_dilations_0 = const()[name = string("op_570_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_570_groups_0 = const()[name = string("op_570_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155098496))), nonzero_data = tensor<fp16, [5143]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155088128))))[name = string("layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_570_cast_fp16 = conv(dilations = var_570_dilations_0, groups = var_570_groups_0, pad = var_570_pad_0, pad_type = var_570_pad_type_0, strides = var_570_strides_0, weight = layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_13_cast_fp16)[name = string("op_570_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> obj_39_cast_fp16 = add(x = var_564_cast_fp16, y = var_570_cast_fp16)[name = string("obj_39_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_39_cast_fp16)[name = string("inputs_11_cast_fp16")];
-            tensor<int32, [1]> out_11_axes_0 = const()[name = string("out_11_axes_0"), val = tensor<int32, [1]>([1])];
-            fp16 var_581_to_fp16 = const()[name = string("op_581_to_fp16"), val = fp16(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> out_11_cast_fp16 = layer_norm(axes = out_11_axes_0, epsilon = var_581_to_fp16, x = inputs_11_cast_fp16)[name = string("out_11_cast_fp16")];
-            tensor<fp16, [1280]> input_15_gamma_0_to_fp16 = const()[name = string("input_15_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155303360)))];
-            tensor<fp16, [1280]> input_15_beta_0_to_fp16 = const()[name = string("input_15_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155305984)))];
-            fp16 input_15_epsilon_0_to_fp16 = const()[name = string("input_15_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> input_15_cast_fp16 = batch_norm(beta = input_15_beta_0_to_fp16, epsilon = input_15_epsilon_0_to_fp16, gamma = input_15_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_11_cast_fp16)[name = string("input_15_cast_fp16")];
-            string var_599_pad_type_0 = const()[name = string("op_599_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_599_strides_0 = const()[name = string("op_599_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_599_pad_0 = const()[name = string("op_599_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_599_dilations_0 = const()[name = string("op_599_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_599_groups_0 = const()[name = string("op_599_groups_0"), val = int32(1)];
-            tensor<fp16, [5120, 1280, 1, 1]> layers_1_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155308608))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(158585472))))[name = string("layers_1_fc1_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [5120]> layers_1_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_1_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(158585600)))];
-            tensor<fp16, [1, 5120, 1, 1]> var_599_cast_fp16 = conv(bias = layers_1_fc1_inlier_module_bias_to_fp16, dilations = var_599_dilations_0, groups = var_599_groups_0, pad = var_599_pad_0, pad_type = var_599_pad_type_0, strides = var_599_strides_0, weight = layers_1_fc1_inlier_module_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = string("op_599_cast_fp16")];
-            string var_605_pad_type_0 = const()[name = string("op_605_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_605_strides_0 = const()[name = string("op_605_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_605_pad_0 = const()[name = string("op_605_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_605_dilations_0 = const()[name = string("op_605_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_605_groups_0 = const()[name = string("op_605_groups_0"), val = int32(1)];
-            tensor<fp16, [5120, 1280, 1, 1]> layers_1_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(158681152))), nonzero_data = tensor<fp16, [42562]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(158595904))))[name = string("layers_1_fc1_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 5120, 1, 1]> var_605_cast_fp16 = conv(dilations = var_605_dilations_0, groups = var_605_groups_0, pad = var_605_pad_0, pad_type = var_605_pad_type_0, strides = var_605_strides_0, weight = layers_1_fc1_outlier_module_weight_to_fp16_sparsified, x = input_15_cast_fp16)[name = string("op_605_cast_fp16")];
-            tensor<fp16, [1, 5120, 1, 1]> input_17_cast_fp16 = add(x = var_599_cast_fp16, y = var_605_cast_fp16)[name = string("input_17_cast_fp16")];
-            string input_19_mode_0 = const()[name = string("input_19_mode_0"), val = string("EXACT")];
-            tensor<fp16, [1, 5120, 1, 1]> input_19_cast_fp16 = gelu(mode = input_19_mode_0, x = input_17_cast_fp16)[name = string("input_19_cast_fp16")];
-            string var_616_pad_type_0 = const()[name = string("op_616_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_616_strides_0 = const()[name = string("op_616_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_616_pad_0 = const()[name = string("op_616_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_616_dilations_0 = const()[name = string("op_616_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_616_groups_0 = const()[name = string("op_616_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 5120, 1, 1]> layers_1_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159500416))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162777280))))[name = string("layers_1_fc2_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1280]> layers_1_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_1_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162777408)))];
-            tensor<fp16, [1, 1280, 1, 1]> var_616_cast_fp16 = conv(bias = layers_1_fc2_inlier_module_bias_to_fp16, dilations = var_616_dilations_0, groups = var_616_groups_0, pad = var_616_pad_0, pad_type = var_616_pad_type_0, strides = var_616_strides_0, weight = layers_1_fc2_inlier_module_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = string("op_616_cast_fp16")];
-            string var_622_pad_type_0 = const()[name = string("op_622_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_622_strides_0 = const()[name = string("op_622_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_622_pad_0 = const()[name = string("op_622_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_622_dilations_0 = const()[name = string("op_622_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_622_groups_0 = const()[name = string("op_622_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 5120, 1, 1]> layers_1_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162868032))), nonzero_data = tensor<fp16, [43939]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162780032))))[name = string("layers_1_fc2_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_622_cast_fp16 = conv(dilations = var_622_dilations_0, groups = var_622_groups_0, pad = var_622_pad_0, pad_type = var_622_pad_type_0, strides = var_622_strides_0, weight = layers_1_fc2_outlier_module_weight_to_fp16_sparsified, x = input_19_cast_fp16)[name = string("op_622_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> hidden_states_5_cast_fp16 = add(x = var_616_cast_fp16, y = var_622_cast_fp16)[name = string("hidden_states_5_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_5_cast_fp16)[name = string("inputs_13_cast_fp16")];
-            tensor<int32, [4]> obj_53_begin_0 = const()[name = string("obj_53_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
-            tensor<int32, [4]> obj_53_end_0 = const()[name = string("obj_53_end_0"), val = tensor<int32, [4]>([3, 1280, 1, 1536])];
-            tensor<bool, [4]> obj_53_end_mask_0 = const()[name = string("obj_53_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
-            tensor<fp16, [1, 1280, 1, 1536]> obj_53_cast_fp16 = slice_by_index(begin = obj_53_begin_0, end = obj_53_end_0, end_mask = obj_53_end_mask_0, x = read_state_2)[name = string("obj_53_cast_fp16")];
-            tensor<int32, [4]> obj_55_begin_0 = const()[name = string("obj_55_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
-            tensor<int32, [4]> obj_55_end_0 = const()[name = string("obj_55_end_0"), val = tensor<int32, [4]>([3, 1280, 1, 1536])];
-            tensor<bool, [4]> obj_55_end_mask_0 = const()[name = string("obj_55_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
-            tensor<fp16, [1, 1280, 1, 1536]> obj_55_cast_fp16 = slice_by_index(begin = obj_55_begin_0, end = obj_55_end_0, end_mask = obj_55_end_mask_0, x = read_state_3)[name = string("obj_55_cast_fp16")];
-            int32 var_644 = const()[name = string("op_644"), val = int32(3)];
-            tensor<int32, [1]> out_13_axes_0 = const()[name = string("out_13_axes_0"), val = tensor<int32, [1]>([1])];
-            fp16 var_669_to_fp16 = const()[name = string("op_669_to_fp16"), val = fp16(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_669_to_fp16, x = inputs_13_cast_fp16)[name = string("out_13_cast_fp16")];
-            tensor<fp16, [1280]> obj_43_gamma_0_to_fp16 = const()[name = string("obj_43_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163687296)))];
-            tensor<fp16, [1280]> obj_43_beta_0_to_fp16 = const()[name = string("obj_43_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163689920)))];
-            fp16 obj_43_epsilon_0_to_fp16 = const()[name = string("obj_43_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> obj_43_cast_fp16 = batch_norm(beta = obj_43_beta_0_to_fp16, epsilon = obj_43_epsilon_0_to_fp16, gamma = obj_43_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_13_cast_fp16)[name = string("obj_43_cast_fp16")];
-            string var_691_pad_type_0 = const()[name = string("op_691_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_691_strides_0 = const()[name = string("op_691_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_691_pad_0 = const()[name = string("op_691_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_691_dilations_0 = const()[name = string("op_691_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_691_groups_0 = const()[name = string("op_691_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163692544))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164511808))))[name = string("layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1280]> layers_2_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164511936)))];
-            tensor<fp16, [1, 1280, 1, 1]> var_691_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_691_dilations_0, groups = var_691_groups_0, pad = var_691_pad_0, pad_type = var_691_pad_type_0, strides = var_691_strides_0, weight = layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = string("op_691_cast_fp16")];
-            string var_697_pad_type_0 = const()[name = string("op_697_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_697_strides_0 = const()[name = string("op_697_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_697_pad_0 = const()[name = string("op_697_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_697_dilations_0 = const()[name = string("op_697_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_697_groups_0 = const()[name = string("op_697_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164546816))), nonzero_data = tensor<fp16, [16094]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164514560))))[name = string("layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_697_cast_fp16 = conv(dilations = var_697_dilations_0, groups = var_697_groups_0, pad = var_697_pad_0, pad_type = var_697_pad_type_0, strides = var_697_strides_0, weight = layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = string("op_697_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> query_9_cast_fp16 = add(x = var_691_cast_fp16, y = var_697_cast_fp16)[name = string("query_9_cast_fp16")];
-            string var_706_pad_type_0 = const()[name = string("op_706_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_706_strides_0 = const()[name = string("op_706_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_706_pad_0 = const()[name = string("op_706_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_706_dilations_0 = const()[name = string("op_706_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_706_groups_0 = const()[name = string("op_706_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164751680))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165570944))))[name = string("layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1, 1280, 1, 1]> var_706_cast_fp16 = conv(dilations = var_706_dilations_0, groups = var_706_groups_0, pad = var_706_pad_0, pad_type = var_706_pad_type_0, strides = var_706_strides_0, weight = layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = string("op_706_cast_fp16")];
-            string var_712_pad_type_0 = const()[name = string("op_712_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_712_strides_0 = const()[name = string("op_712_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_712_pad_0 = const()[name = string("op_712_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_712_dilations_0 = const()[name = string("op_712_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_712_groups_0 = const()[name = string("op_712_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165608576))), nonzero_data = tensor<fp16, [18690]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165571072))))[name = string("layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_712_cast_fp16 = conv(dilations = var_712_dilations_0, groups = var_712_groups_0, pad = var_712_pad_0, pad_type = var_712_pad_type_0, strides = var_712_strides_0, weight = layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = string("op_712_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> current_key_5_cast_fp16 = add(x = var_706_cast_fp16, y = var_712_cast_fp16)[name = string("current_key_5_cast_fp16")];
-            string var_722_pad_type_0 = const()[name = string("op_722_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_722_strides_0 = const()[name = string("op_722_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_722_pad_0 = const()[name = string("op_722_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_722_dilations_0 = const()[name = string("op_722_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_722_groups_0 = const()[name = string("op_722_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165813440))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166632704))))[name = string("layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1280]> layers_2_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166632832)))];
-            tensor<fp16, [1, 1280, 1, 1]> var_722_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_722_dilations_0, groups = var_722_groups_0, pad = var_722_pad_0, pad_type = var_722_pad_type_0, strides = var_722_strides_0, weight = layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = string("op_722_cast_fp16")];
-            string var_728_pad_type_0 = const()[name = string("op_728_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_728_strides_0 = const()[name = string("op_728_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_728_pad_0 = const()[name = string("op_728_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_728_dilations_0 = const()[name = string("op_728_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_728_groups_0 = const()[name = string("op_728_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166648384))), nonzero_data = tensor<fp16, [6431]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166635456))))[name = string("layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_728_cast_fp16 = conv(dilations = var_728_dilations_0, groups = var_728_groups_0, pad = var_728_pad_0, pad_type = var_728_pad_type_0, strides = var_728_strides_0, weight = layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = string("op_728_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> current_value_5_cast_fp16 = add(x = var_722_cast_fp16, y = var_728_cast_fp16)[name = string("current_value_5_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 448]> var_734_cast_fp16 = mul(x = current_key_5_cast_fp16, y = var_170_cast_fp16)[name = string("op_734_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 448]> key_5_cast_fp16 = add(x = var_55_cast_fp16_2, y = var_734_cast_fp16)[name = string("key_5_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 448]> var_736_cast_fp16 = mul(x = current_value_5_cast_fp16, y = var_170_cast_fp16)[name = string("op_736_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 448]> value_5_cast_fp16 = add(x = var_62_cast_fp16_2, y = var_736_cast_fp16)[name = string("value_5_cast_fp16")];
-            tensor<int32, [4]> var_739 = const()[name = string("op_739"), val = tensor<int32, [4]>([1, 20, 64, -1])];
-            tensor<fp16, [1, 20, 64, 1]> mh_q_9_cast_fp16 = reshape(shape = var_739, x = query_9_cast_fp16)[name = string("mh_q_9_cast_fp16")];
-            fp16 var_741_to_fp16 = const()[name = string("op_741_to_fp16"), val = fp16(0x1p-3)];
-            tensor<fp16, [1, 20, 64, 1]> var_742_cast_fp16 = mul(x = mh_q_9_cast_fp16, y = var_741_to_fp16)[name = string("op_742_cast_fp16")];
-            tensor<int32, [4]> var_743 = const()[name = string("op_743"), val = tensor<int32, [4]>([1, 20, 64, -1])];
-            tensor<fp16, [1, 20, 64, 448]> var_744_cast_fp16 = reshape(shape = var_743, x = key_5_cast_fp16)[name = string("op_744_cast_fp16")];
-            bool mh_w_17_transpose_x_0 = const()[name = string("mh_w_17_transpose_x_0"), val = bool(true)];
-            bool mh_w_17_transpose_y_0 = const()[name = string("mh_w_17_transpose_y_0"), val = bool(false)];
-            tensor<fp16, [1, 20, 1, 448]> mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_742_cast_fp16, y = var_744_cast_fp16)[name = string("mh_w_17_cast_fp16")];
-            tensor<fp16, [1, 20, 1, 448]> mh_w_19_cast_fp16 = add(x = mh_w_17_cast_fp16, y = var_187_cast_fp16)[name = string("mh_w_19_cast_fp16")];
-            tensor<fp16, [1, 20, 1, 448]> var_752_cast_fp16 = softmax(axis = var_644, x = mh_w_19_cast_fp16)[name = string("op_752_cast_fp16")];
-            tensor<int32, [4]> var_753 = const()[name = string("op_753"), val = tensor<int32, [4]>([1, 20, 64, -1])];
-            tensor<fp16, [1, 20, 64, 448]> var_754_cast_fp16 = reshape(shape = var_753, x = value_5_cast_fp16)[name = string("op_754_cast_fp16")];
-            bool attn_9_transpose_x_0 = const()[name = string("attn_9_transpose_x_0"), val = bool(false)];
-            bool attn_9_transpose_y_0 = const()[name = string("attn_9_transpose_y_0"), val = bool(true)];
-            tensor<fp16, [1, 20, 64, 1]> attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = var_754_cast_fp16, y = var_752_cast_fp16)[name = string("attn_9_cast_fp16")];
-            tensor<int32, [4]> var_757 = const()[name = string("op_757"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
-            tensor<fp16, [1, 1280, 1, 1]> input_21_cast_fp16 = reshape(shape = var_757, x = attn_9_cast_fp16)[name = string("input_21_cast_fp16")];
-            string var_767_pad_type_0 = const()[name = string("op_767_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_767_strides_0 = const()[name = string("op_767_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_767_pad_0 = const()[name = string("op_767_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_767_dilations_0 = const()[name = string("op_767_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_767_groups_0 = const()[name = string("op_767_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166853248))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167672512))))[name = string("layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1280]> layers_2_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167672640)))];
-            tensor<fp16, [1, 1280, 1, 1]> var_767_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_767_dilations_0, groups = var_767_groups_0, pad = var_767_pad_0, pad_type = var_767_pad_type_0, strides = var_767_strides_0, weight = layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_21_cast_fp16)[name = string("op_767_cast_fp16")];
-            string var_773_pad_type_0 = const()[name = string("op_773_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_773_strides_0 = const()[name = string("op_773_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_773_pad_0 = const()[name = string("op_773_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_773_dilations_0 = const()[name = string("op_773_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_773_groups_0 = const()[name = string("op_773_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167686720))), nonzero_data = tensor<fp16, [5678]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167675264))))[name = string("layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_773_cast_fp16 = conv(dilations = var_773_dilations_0, groups = var_773_groups_0, pad = var_773_pad_0, pad_type = var_773_pad_type_0, strides = var_773_strides_0, weight = layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_21_cast_fp16)[name = string("op_773_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> obj_49_cast_fp16 = add(x = var_767_cast_fp16, y = var_773_cast_fp16)[name = string("obj_49_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_49_cast_fp16)[name = string("inputs_15_cast_fp16")];
-            tensor<int32, [1]> out_15_axes_0 = const()[name = string("out_15_axes_0"), val = tensor<int32, [1]>([1])];
-            fp16 var_788_to_fp16 = const()[name = string("op_788_to_fp16"), val = fp16(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> out_15_cast_fp16 = layer_norm(axes = out_15_axes_0, epsilon = var_788_to_fp16, x = inputs_15_cast_fp16)[name = string("out_15_cast_fp16")];
-            tensor<fp16, [1280]> obj_51_gamma_0_to_fp16 = const()[name = string("obj_51_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167891584)))];
-            tensor<fp16, [1280]> obj_51_beta_0_to_fp16 = const()[name = string("obj_51_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167894208)))];
-            fp16 obj_51_epsilon_0_to_fp16 = const()[name = string("obj_51_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> obj_51_cast_fp16 = batch_norm(beta = obj_51_beta_0_to_fp16, epsilon = obj_51_epsilon_0_to_fp16, gamma = obj_51_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_15_cast_fp16)[name = string("obj_51_cast_fp16")];
-            string var_808_pad_type_0 = const()[name = string("op_808_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_808_strides_0 = const()[name = string("op_808_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_808_pad_0 = const()[name = string("op_808_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_808_dilations_0 = const()[name = string("op_808_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_808_groups_0 = const()[name = string("op_808_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167896832))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168716096))))[name = string("layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1280]> layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168716224)))];
-            tensor<fp16, [1, 1280, 1, 1]> var_808_cast_fp16 = conv(bias = layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_808_dilations_0, groups = var_808_groups_0, pad = var_808_pad_0, pad_type = var_808_pad_type_0, strides = var_808_strides_0, weight = layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_51_cast_fp16)[name = string("op_808_cast_fp16")];
-            string var_814_pad_type_0 = const()[name = string("op_814_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_814_strides_0 = const()[name = string("op_814_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_814_pad_0 = const()[name = string("op_814_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_814_dilations_0 = const()[name = string("op_814_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_814_groups_0 = const()[name = string("op_814_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168746560))), nonzero_data = tensor<fp16, [13824]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168718848))))[name = string("layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_814_cast_fp16 = conv(dilations = var_814_dilations_0, groups = var_814_groups_0, pad = var_814_pad_0, pad_type = var_814_pad_type_0, strides = var_814_strides_0, weight = layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_51_cast_fp16)[name = string("op_814_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> query_11_cast_fp16 = add(x = var_808_cast_fp16, y = var_814_cast_fp16)[name = string("query_11_cast_fp16")];
-            tensor<int32, [4]> var_817 = const()[name = string("op_817"), val = tensor<int32, [4]>([1, 20, 64, -1])];
-            tensor<fp16, [1, 20, 64, 1]> mh_q_11_cast_fp16 = reshape(shape = var_817, x = query_11_cast_fp16)[name = string("mh_q_11_cast_fp16")];
-            fp16 var_819_to_fp16 = const()[name = string("op_819_to_fp16"), val = fp16(0x1p-3)];
-            tensor<fp16, [1, 20, 64, 1]> var_820_cast_fp16 = mul(x = mh_q_11_cast_fp16, y = var_819_to_fp16)[name = string("op_820_cast_fp16")];
-            tensor<int32, [4]> var_821 = const()[name = string("op_821"), val = tensor<int32, [4]>([1, 20, 64, -1])];
-            tensor<fp16, [1, 20, 64, 1536]> var_822_cast_fp16 = reshape(shape = var_821, x = obj_53_cast_fp16)[name = string("op_822_cast_fp16")];
-            bool mh_w_21_transpose_x_0 = const()[name = string("mh_w_21_transpose_x_0"), val = bool(true)];
-            bool mh_w_21_transpose_y_0 = const()[name = string("mh_w_21_transpose_y_0"), val = bool(false)];
-            tensor<fp16, [1, 20, 1, 1536]> mh_w_21_cast_fp16 = matmul(transpose_x = mh_w_21_transpose_x_0, transpose_y = mh_w_21_transpose_y_0, x = var_820_cast_fp16, y = var_822_cast_fp16)[name = string("mh_w_21_cast_fp16")];
-            tensor<fp16, [1, 20, 1, 1536]> mh_w_23_cast_fp16 = add(x = mh_w_21_cast_fp16, y = var_265_cast_fp16)[name = string("mh_w_23_cast_fp16")];
-            tensor<fp16, [1, 20, 1, 1536]> obj_59_cast_fp16 = softmax(axis = var_644, x = mh_w_23_cast_fp16)[name = string("obj_59_cast_fp16")];
-            tensor<int32, [4]> var_831 = const()[name = string("op_831"), val = tensor<int32, [4]>([1, 20, 64, -1])];
-            tensor<fp16, [1, 20, 64, 1536]> var_832_cast_fp16 = reshape(shape = var_831, x = obj_55_cast_fp16)[name = string("op_832_cast_fp16")];
-            bool attn_11_transpose_x_0 = const()[name = string("attn_11_transpose_x_0"), val = bool(false)];
-            bool attn_11_transpose_y_0 = const()[name = string("attn_11_transpose_y_0"), val = bool(true)];
-            tensor<fp16, [1, 20, 64, 1]> attn_11_cast_fp16 = matmul(transpose_x = attn_11_transpose_x_0, transpose_y = attn_11_transpose_y_0, x = var_832_cast_fp16, y = obj_59_cast_fp16)[name = string("attn_11_cast_fp16")];
-            tensor<int32, [4]> var_835 = const()[name = string("op_835"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
-            tensor<fp16, [1, 1280, 1, 1]> input_23_cast_fp16 = reshape(shape = var_835, x = attn_11_cast_fp16)[name = string("input_23_cast_fp16")];
-            string var_845_pad_type_0 = const()[name = string("op_845_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_845_strides_0 = const()[name = string("op_845_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_845_pad_0 = const()[name = string("op_845_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_845_dilations_0 = const()[name = string("op_845_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_845_groups_0 = const()[name = string("op_845_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168951424))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169770688))))[name = string("layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1280]> layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169770816)))];
-            tensor<fp16, [1, 1280, 1, 1]> var_845_cast_fp16 = conv(bias = layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_845_dilations_0, groups = var_845_groups_0, pad = var_845_pad_0, pad_type = var_845_pad_type_0, strides = var_845_strides_0, weight = layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_23_cast_fp16)[name = string("op_845_cast_fp16")];
-            string var_851_pad_type_0 = const()[name = string("op_851_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_851_strides_0 = const()[name = string("op_851_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_851_pad_0 = const()[name = string("op_851_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_851_dilations_0 = const()[name = string("op_851_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_851_groups_0 = const()[name = string("op_851_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169786432))), nonzero_data = tensor<fp16, [6438]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169773440))))[name = string("layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_851_cast_fp16 = conv(dilations = var_851_dilations_0, groups = var_851_groups_0, pad = var_851_pad_0, pad_type = var_851_pad_type_0, strides = var_851_strides_0, weight = layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_23_cast_fp16)[name = string("op_851_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> obj_57_cast_fp16 = add(x = var_845_cast_fp16, y = var_851_cast_fp16)[name = string("obj_57_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = obj_57_cast_fp16)[name = string("inputs_17_cast_fp16")];
-            tensor<int32, [1]> out_17_axes_0 = const()[name = string("out_17_axes_0"), val = tensor<int32, [1]>([1])];
-            fp16 var_865_to_fp16 = const()[name = string("op_865_to_fp16"), val = fp16(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> out_17_cast_fp16 = layer_norm(axes = out_17_axes_0, epsilon = var_865_to_fp16, x = inputs_17_cast_fp16)[name = string("out_17_cast_fp16")];
-            tensor<fp16, [1280]> input_25_gamma_0_to_fp16 = const()[name = string("input_25_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169991296)))];
-            tensor<fp16, [1280]> input_25_beta_0_to_fp16 = const()[name = string("input_25_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169993920)))];
-            fp16 input_25_epsilon_0_to_fp16 = const()[name = string("input_25_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> input_25_cast_fp16 = batch_norm(beta = input_25_beta_0_to_fp16, epsilon = input_25_epsilon_0_to_fp16, gamma = input_25_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_17_cast_fp16)[name = string("input_25_cast_fp16")];
-            string var_883_pad_type_0 = const()[name = string("op_883_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_883_strides_0 = const()[name = string("op_883_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_883_pad_0 = const()[name = string("op_883_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_883_dilations_0 = const()[name = string("op_883_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_883_groups_0 = const()[name = string("op_883_groups_0"), val = int32(1)];
-            tensor<fp16, [5120, 1280, 1, 1]> layers_2_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169996544))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173273408))))[name = string("layers_2_fc1_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [5120]> layers_2_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_2_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173273536)))];
-            tensor<fp16, [1, 5120, 1, 1]> var_883_cast_fp16 = conv(bias = layers_2_fc1_inlier_module_bias_to_fp16, dilations = var_883_dilations_0, groups = var_883_groups_0, pad = var_883_pad_0, pad_type = var_883_pad_type_0, strides = var_883_strides_0, weight = layers_2_fc1_inlier_module_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = string("op_883_cast_fp16")];
-            string var_889_pad_type_0 = const()[name = string("op_889_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_889_strides_0 = const()[name = string("op_889_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_889_pad_0 = const()[name = string("op_889_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_889_dilations_0 = const()[name = string("op_889_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_889_groups_0 = const()[name = string("op_889_groups_0"), val = int32(1)];
-            tensor<fp16, [5120, 1280, 1, 1]> layers_2_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173445760))), nonzero_data = tensor<fp16, [80920]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173283840))))[name = string("layers_2_fc1_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 5120, 1, 1]> var_889_cast_fp16 = conv(dilations = var_889_dilations_0, groups = var_889_groups_0, pad = var_889_pad_0, pad_type = var_889_pad_type_0, strides = var_889_strides_0, weight = layers_2_fc1_outlier_module_weight_to_fp16_sparsified, x = input_25_cast_fp16)[name = string("op_889_cast_fp16")];
-            tensor<fp16, [1, 5120, 1, 1]> input_27_cast_fp16 = add(x = var_883_cast_fp16, y = var_889_cast_fp16)[name = string("input_27_cast_fp16")];
-            string input_29_mode_0 = const()[name = string("input_29_mode_0"), val = string("EXACT")];
-            tensor<fp16, [1, 5120, 1, 1]> input_29_cast_fp16 = gelu(mode = input_29_mode_0, x = input_27_cast_fp16)[name = string("input_29_cast_fp16")];
-            string var_900_pad_type_0 = const()[name = string("op_900_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_900_strides_0 = const()[name = string("op_900_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_900_pad_0 = const()[name = string("op_900_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_900_dilations_0 = const()[name = string("op_900_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_900_groups_0 = const()[name = string("op_900_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 5120, 1, 1]> layers_2_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174265024))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177541888))))[name = string("layers_2_fc2_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1280]> layers_2_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_2_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177542016)))];
-            tensor<fp16, [1, 1280, 1, 1]> var_900_cast_fp16 = conv(bias = layers_2_fc2_inlier_module_bias_to_fp16, dilations = var_900_dilations_0, groups = var_900_groups_0, pad = var_900_pad_0, pad_type = var_900_pad_type_0, strides = var_900_strides_0, weight = layers_2_fc2_inlier_module_weight_to_fp16_palettized, x = input_29_cast_fp16)[name = string("op_900_cast_fp16")];
-            string var_906_pad_type_0 = const()[name = string("op_906_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_906_strides_0 = const()[name = string("op_906_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_906_pad_0 = const()[name = string("op_906_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_906_dilations_0 = const()[name = string("op_906_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_906_groups_0 = const()[name = string("op_906_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 5120, 1, 1]> layers_2_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177624832))), nonzero_data = tensor<fp16, [40054]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177544640))))[name = string("layers_2_fc2_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_906_cast_fp16 = conv(dilations = var_906_dilations_0, groups = var_906_groups_0, pad = var_906_pad_0, pad_type = var_906_pad_type_0, strides = var_906_strides_0, weight = layers_2_fc2_outlier_module_weight_to_fp16_sparsified, x = input_29_cast_fp16)[name = string("op_906_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> hidden_states_7_cast_fp16 = add(x = var_900_cast_fp16, y = var_906_cast_fp16)[name = string("hidden_states_7_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = hidden_states_7_cast_fp16)[name = string("inputs_19_cast_fp16")];
-            tensor<int32, [4]> obj_71_begin_0 = const()[name = string("obj_71_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
-            tensor<int32, [4]> obj_71_end_0 = const()[name = string("obj_71_end_0"), val = tensor<int32, [4]>([4, 1280, 1, 1536])];
-            tensor<bool, [4]> obj_71_end_mask_0 = const()[name = string("obj_71_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
-            tensor<fp16, [1, 1280, 1, 1536]> obj_71_cast_fp16 = slice_by_index(begin = obj_71_begin_0, end = obj_71_end_0, end_mask = obj_71_end_mask_0, x = read_state_2)[name = string("obj_71_cast_fp16")];
-            tensor<int32, [4]> obj_73_begin_0 = const()[name = string("obj_73_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
-            tensor<int32, [4]> obj_73_end_0 = const()[name = string("obj_73_end_0"), val = tensor<int32, [4]>([4, 1280, 1, 1536])];
-            tensor<bool, [4]> obj_73_end_mask_0 = const()[name = string("obj_73_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
-            tensor<fp16, [1, 1280, 1, 1536]> obj_73_cast_fp16 = slice_by_index(begin = obj_73_begin_0, end = obj_73_end_0, end_mask = obj_73_end_mask_0, x = read_state_3)[name = string("obj_73_cast_fp16")];
-            int32 var_929 = const()[name = string("op_929"), val = int32(3)];
-            tensor<int32, [1]> out_19_axes_0 = const()[name = string("out_19_axes_0"), val = tensor<int32, [1]>([1])];
-            fp16 var_954_to_fp16 = const()[name = string("op_954_to_fp16"), val = fp16(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_954_to_fp16, x = inputs_19_cast_fp16)[name = string("out_19_cast_fp16")];
-            tensor<fp16, [1280]> obj_61_gamma_0_to_fp16 = const()[name = string("obj_61_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(178444096)))];
-            tensor<fp16, [1280]> obj_61_beta_0_to_fp16 = const()[name = string("obj_61_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(178446720)))];
-            fp16 obj_61_epsilon_0_to_fp16 = const()[name = string("obj_61_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> obj_61_cast_fp16 = batch_norm(beta = obj_61_beta_0_to_fp16, epsilon = obj_61_epsilon_0_to_fp16, gamma = obj_61_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_19_cast_fp16)[name = string("obj_61_cast_fp16")];
-            string var_976_pad_type_0 = const()[name = string("op_976_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_976_strides_0 = const()[name = string("op_976_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_976_pad_0 = const()[name = string("op_976_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_976_dilations_0 = const()[name = string("op_976_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_976_groups_0 = const()[name = string("op_976_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(178449344))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179268608))))[name = string("layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1280]> layers_3_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179268736)))];
-            tensor<fp16, [1, 1280, 1, 1]> var_976_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_976_dilations_0, groups = var_976_groups_0, pad = var_976_pad_0, pad_type = var_976_pad_type_0, strides = var_976_strides_0, weight = layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = string("op_976_cast_fp16")];
-            string var_982_pad_type_0 = const()[name = string("op_982_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_982_strides_0 = const()[name = string("op_982_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_982_pad_0 = const()[name = string("op_982_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_982_dilations_0 = const()[name = string("op_982_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_982_groups_0 = const()[name = string("op_982_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179292800))), nonzero_data = tensor<fp16, [10664]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179271360))))[name = string("layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_982_cast_fp16 = conv(dilations = var_982_dilations_0, groups = var_982_groups_0, pad = var_982_pad_0, pad_type = var_982_pad_type_0, strides = var_982_strides_0, weight = layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = string("op_982_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> query_13_cast_fp16 = add(x = var_976_cast_fp16, y = var_982_cast_fp16)[name = string("query_13_cast_fp16")];
-            string var_991_pad_type_0 = const()[name = string("op_991_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_991_strides_0 = const()[name = string("op_991_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_991_pad_0 = const()[name = string("op_991_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_991_dilations_0 = const()[name = string("op_991_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_991_groups_0 = const()[name = string("op_991_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179497664))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180316928))))[name = string("layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1, 1280, 1, 1]> var_991_cast_fp16 = conv(dilations = var_991_dilations_0, groups = var_991_groups_0, pad = var_991_pad_0, pad_type = var_991_pad_type_0, strides = var_991_strides_0, weight = layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = string("op_991_cast_fp16")];
-            string var_997_pad_type_0 = const()[name = string("op_997_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_997_strides_0 = const()[name = string("op_997_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_997_pad_0 = const()[name = string("op_997_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_997_dilations_0 = const()[name = string("op_997_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_997_groups_0 = const()[name = string("op_997_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180337920))), nonzero_data = tensor<fp16, [10387]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180317056))))[name = string("layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_997_cast_fp16 = conv(dilations = var_997_dilations_0, groups = var_997_groups_0, pad = var_997_pad_0, pad_type = var_997_pad_type_0, strides = var_997_strides_0, weight = layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = string("op_997_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> current_key_cast_fp16 = add(x = var_991_cast_fp16, y = var_997_cast_fp16)[name = string("current_key_cast_fp16")];
-            string var_1007_pad_type_0 = const()[name = string("op_1007_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_1007_strides_0 = const()[name = string("op_1007_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_1007_pad_0 = const()[name = string("op_1007_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_1007_dilations_0 = const()[name = string("op_1007_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_1007_groups_0 = const()[name = string("op_1007_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180542784))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181362048))))[name = string("layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1280]> layers_3_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181362176)))];
-            tensor<fp16, [1, 1280, 1, 1]> var_1007_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1007_dilations_0, groups = var_1007_groups_0, pad = var_1007_pad_0, pad_type = var_1007_pad_type_0, strides = var_1007_strides_0, weight = layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = string("op_1007_cast_fp16")];
-            string var_1013_pad_type_0 = const()[name = string("op_1013_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_1013_strides_0 = const()[name = string("op_1013_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_1013_pad_0 = const()[name = string("op_1013_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_1013_dilations_0 = const()[name = string("op_1013_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_1013_groups_0 = const()[name = string("op_1013_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181379584))), nonzero_data = tensor<fp16, [7342]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181364800))))[name = string("layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_1013_cast_fp16 = conv(dilations = var_1013_dilations_0, groups = var_1013_groups_0, pad = var_1013_pad_0, pad_type = var_1013_pad_type_0, strides = var_1013_strides_0, weight = layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = string("op_1013_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> current_value_cast_fp16 = add(x = var_1007_cast_fp16, y = var_1013_cast_fp16)[name = string("current_value_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 448]> var_1019_cast_fp16 = mul(x = current_key_cast_fp16, y = var_170_cast_fp16)[name = string("op_1019_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 448]> key_cast_fp16 = add(x = var_55_cast_fp16_3, y = var_1019_cast_fp16)[name = string("key_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 448]> var_1021_cast_fp16 = mul(x = current_value_cast_fp16, y = var_170_cast_fp16)[name = string("op_1021_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 448]> value_cast_fp16 = add(x = var_62_cast_fp16_3, y = var_1021_cast_fp16)[name = string("value_cast_fp16")];
-            tensor<int32, [4]> var_1024 = const()[name = string("op_1024"), val = tensor<int32, [4]>([1, 20, 64, -1])];
-            tensor<fp16, [1, 20, 64, 1]> mh_q_13_cast_fp16 = reshape(shape = var_1024, x = query_13_cast_fp16)[name = string("mh_q_13_cast_fp16")];
-            fp16 var_1026_to_fp16 = const()[name = string("op_1026_to_fp16"), val = fp16(0x1p-3)];
-            tensor<fp16, [1, 20, 64, 1]> var_1027_cast_fp16 = mul(x = mh_q_13_cast_fp16, y = var_1026_to_fp16)[name = string("op_1027_cast_fp16")];
-            tensor<int32, [4]> var_1028 = const()[name = string("op_1028"), val = tensor<int32, [4]>([1, 20, 64, -1])];
-            tensor<fp16, [1, 20, 64, 448]> var_1029_cast_fp16 = reshape(shape = var_1028, x = key_cast_fp16)[name = string("op_1029_cast_fp16")];
-            bool mh_w_25_transpose_x_0 = const()[name = string("mh_w_25_transpose_x_0"), val = bool(true)];
-            bool mh_w_25_transpose_y_0 = const()[name = string("mh_w_25_transpose_y_0"), val = bool(false)];
-            tensor<fp16, [1, 20, 1, 448]> mh_w_25_cast_fp16 = matmul(transpose_x = mh_w_25_transpose_x_0, transpose_y = mh_w_25_transpose_y_0, x = var_1027_cast_fp16, y = var_1029_cast_fp16)[name = string("mh_w_25_cast_fp16")];
-            tensor<fp16, [1, 20, 1, 448]> mh_w_27_cast_fp16 = add(x = mh_w_25_cast_fp16, y = var_187_cast_fp16)[name = string("mh_w_27_cast_fp16")];
-            tensor<fp16, [1, 20, 1, 448]> var_1037_cast_fp16 = softmax(axis = var_929, x = mh_w_27_cast_fp16)[name = string("op_1037_cast_fp16")];
-            tensor<int32, [4]> var_1038 = const()[name = string("op_1038"), val = tensor<int32, [4]>([1, 20, 64, -1])];
-            tensor<fp16, [1, 20, 64, 448]> var_1039_cast_fp16 = reshape(shape = var_1038, x = value_cast_fp16)[name = string("op_1039_cast_fp16")];
-            bool attn_13_transpose_x_0 = const()[name = string("attn_13_transpose_x_0"), val = bool(false)];
-            bool attn_13_transpose_y_0 = const()[name = string("attn_13_transpose_y_0"), val = bool(true)];
-            tensor<fp16, [1, 20, 64, 1]> attn_13_cast_fp16 = matmul(transpose_x = attn_13_transpose_x_0, transpose_y = attn_13_transpose_y_0, x = var_1039_cast_fp16, y = var_1037_cast_fp16)[name = string("attn_13_cast_fp16")];
-            tensor<int32, [4]> var_1042 = const()[name = string("op_1042"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
-            tensor<fp16, [1, 1280, 1, 1]> input_31_cast_fp16 = reshape(shape = var_1042, x = attn_13_cast_fp16)[name = string("input_31_cast_fp16")];
-            string var_1052_pad_type_0 = const()[name = string("op_1052_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_1052_strides_0 = const()[name = string("op_1052_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_1052_pad_0 = const()[name = string("op_1052_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_1052_dilations_0 = const()[name = string("op_1052_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_1052_groups_0 = const()[name = string("op_1052_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181584448))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182403712))))[name = string("layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1280]> layers_3_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182403840)))];
-            tensor<fp16, [1, 1280, 1, 1]> var_1052_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1052_dilations_0, groups = var_1052_groups_0, pad = var_1052_pad_0, pad_type = var_1052_pad_type_0, strides = var_1052_strides_0, weight = layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = string("op_1052_cast_fp16")];
-            string var_1058_pad_type_0 = const()[name = string("op_1058_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_1058_strides_0 = const()[name = string("op_1058_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_1058_pad_0 = const()[name = string("op_1058_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_1058_dilations_0 = const()[name = string("op_1058_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_1058_groups_0 = const()[name = string("op_1058_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182420992))), nonzero_data = tensor<fp16, [7219]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182406464))))[name = string("layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_1058_cast_fp16 = conv(dilations = var_1058_dilations_0, groups = var_1058_groups_0, pad = var_1058_pad_0, pad_type = var_1058_pad_type_0, strides = var_1058_strides_0, weight = layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_31_cast_fp16)[name = string("op_1058_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> obj_67_cast_fp16 = add(x = var_1052_cast_fp16, y = var_1058_cast_fp16)[name = string("obj_67_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = obj_67_cast_fp16)[name = string("inputs_21_cast_fp16")];
-            tensor<int32, [1]> out_21_axes_0 = const()[name = string("out_21_axes_0"), val = tensor<int32, [1]>([1])];
-            fp16 var_1073_to_fp16 = const()[name = string("op_1073_to_fp16"), val = fp16(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> out_21_cast_fp16 = layer_norm(axes = out_21_axes_0, epsilon = var_1073_to_fp16, x = inputs_21_cast_fp16)[name = string("out_21_cast_fp16")];
-            tensor<fp16, [1280]> obj_69_gamma_0_to_fp16 = const()[name = string("obj_69_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182625856)))];
-            tensor<fp16, [1280]> obj_69_beta_0_to_fp16 = const()[name = string("obj_69_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182628480)))];
-            fp16 obj_69_epsilon_0_to_fp16 = const()[name = string("obj_69_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> obj_69_cast_fp16 = batch_norm(beta = obj_69_beta_0_to_fp16, epsilon = obj_69_epsilon_0_to_fp16, gamma = obj_69_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_21_cast_fp16)[name = string("obj_69_cast_fp16")];
-            string var_1093_pad_type_0 = const()[name = string("op_1093_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_1093_strides_0 = const()[name = string("op_1093_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_1093_pad_0 = const()[name = string("op_1093_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_1093_dilations_0 = const()[name = string("op_1093_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_1093_groups_0 = const()[name = string("op_1093_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182631104))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183450368))))[name = string("layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1280]> layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183450496)))];
-            tensor<fp16, [1, 1280, 1, 1]> var_1093_cast_fp16 = conv(bias = layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1093_dilations_0, groups = var_1093_groups_0, pad = var_1093_pad_0, pad_type = var_1093_pad_type_0, strides = var_1093_strides_0, weight = layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_69_cast_fp16)[name = string("op_1093_cast_fp16")];
-            string var_1099_pad_type_0 = const()[name = string("op_1099_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_1099_strides_0 = const()[name = string("op_1099_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_1099_pad_0 = const()[name = string("op_1099_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_1099_dilations_0 = const()[name = string("op_1099_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_1099_groups_0 = const()[name = string("op_1099_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183468544))), nonzero_data = tensor<fp16, [7675]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183453120))))[name = string("layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_1099_cast_fp16 = conv(dilations = var_1099_dilations_0, groups = var_1099_groups_0, pad = var_1099_pad_0, pad_type = var_1099_pad_type_0, strides = var_1099_strides_0, weight = layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_69_cast_fp16)[name = string("op_1099_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> query_cast_fp16 = add(x = var_1093_cast_fp16, y = var_1099_cast_fp16)[name = string("query_cast_fp16")];
-            tensor<int32, [4]> var_1102 = const()[name = string("op_1102"), val = tensor<int32, [4]>([1, 20, 64, -1])];
-            tensor<fp16, [1, 20, 64, 1]> mh_q_cast_fp16 = reshape(shape = var_1102, x = query_cast_fp16)[name = string("mh_q_cast_fp16")];
-            fp16 var_1104_to_fp16 = const()[name = string("op_1104_to_fp16"), val = fp16(0x1p-3)];
-            tensor<fp16, [1, 20, 64, 1]> var_1105_cast_fp16 = mul(x = mh_q_cast_fp16, y = var_1104_to_fp16)[name = string("op_1105_cast_fp16")];
-            tensor<int32, [4]> var_1106 = const()[name = string("op_1106"), val = tensor<int32, [4]>([1, 20, 64, -1])];
-            tensor<fp16, [1, 20, 64, 1536]> var_1107_cast_fp16 = reshape(shape = var_1106, x = obj_71_cast_fp16)[name = string("op_1107_cast_fp16")];
-            bool mh_w_29_transpose_x_0 = const()[name = string("mh_w_29_transpose_x_0"), val = bool(true)];
-            bool mh_w_29_transpose_y_0 = const()[name = string("mh_w_29_transpose_y_0"), val = bool(false)];
-            tensor<fp16, [1, 20, 1, 1536]> mh_w_29_cast_fp16 = matmul(transpose_x = mh_w_29_transpose_x_0, transpose_y = mh_w_29_transpose_y_0, x = var_1105_cast_fp16, y = var_1107_cast_fp16)[name = string("mh_w_29_cast_fp16")];
-            tensor<fp16, [1, 20, 1, 1536]> mh_w_cast_fp16 = add(x = mh_w_29_cast_fp16, y = var_265_cast_fp16)[name = string("mh_w_cast_fp16")];
-            tensor<fp16, [1, 20, 1, 1536]> obj_77_cast_fp16 = softmax(axis = var_929, x = mh_w_cast_fp16)[name = string("obj_77_cast_fp16")];
-            tensor<int32, [4]> var_1116 = const()[name = string("op_1116"), val = tensor<int32, [4]>([1, 20, 64, -1])];
-            tensor<fp16, [1, 20, 64, 1536]> var_1117_cast_fp16 = reshape(shape = var_1116, x = obj_73_cast_fp16)[name = string("op_1117_cast_fp16")];
-            bool attn_transpose_x_0 = const()[name = string("attn_transpose_x_0"), val = bool(false)];
-            bool attn_transpose_y_0 = const()[name = string("attn_transpose_y_0"), val = bool(true)];
-            tensor<fp16, [1, 20, 64, 1]> attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_1117_cast_fp16, y = obj_77_cast_fp16)[name = string("attn_cast_fp16")];
-            tensor<int32, [4]> var_1120 = const()[name = string("op_1120"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
-            tensor<fp16, [1, 1280, 1, 1]> input_33_cast_fp16 = reshape(shape = var_1120, x = attn_cast_fp16)[name = string("input_33_cast_fp16")];
-            string var_1130_pad_type_0 = const()[name = string("op_1130_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_1130_strides_0 = const()[name = string("op_1130_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_1130_pad_0 = const()[name = string("op_1130_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_1130_dilations_0 = const()[name = string("op_1130_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_1130_groups_0 = const()[name = string("op_1130_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183673408))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184492672))))[name = string("layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1280]> layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184492800)))];
-            tensor<fp16, [1, 1280, 1, 1]> var_1130_cast_fp16 = conv(bias = layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1130_dilations_0, groups = var_1130_groups_0, pad = var_1130_pad_0, pad_type = var_1130_pad_type_0, strides = var_1130_strides_0, weight = layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = string("op_1130_cast_fp16")];
-            string var_1136_pad_type_0 = const()[name = string("op_1136_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_1136_strides_0 = const()[name = string("op_1136_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_1136_pad_0 = const()[name = string("op_1136_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_1136_dilations_0 = const()[name = string("op_1136_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_1136_groups_0 = const()[name = string("op_1136_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 1280, 1, 1]> layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184507136))), nonzero_data = tensor<fp16, [5809]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184495424))))[name = string("layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_1136_cast_fp16 = conv(dilations = var_1136_dilations_0, groups = var_1136_groups_0, pad = var_1136_pad_0, pad_type = var_1136_pad_type_0, strides = var_1136_strides_0, weight = layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_33_cast_fp16)[name = string("op_1136_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> obj_75_cast_fp16 = add(x = var_1130_cast_fp16, y = var_1136_cast_fp16)[name = string("obj_75_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_75_cast_fp16)[name = string("inputs_23_cast_fp16")];
-            tensor<int32, [1]> out_23_axes_0 = const()[name = string("out_23_axes_0"), val = tensor<int32, [1]>([1])];
-            fp16 var_1150_to_fp16 = const()[name = string("op_1150_to_fp16"), val = fp16(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> out_23_cast_fp16 = layer_norm(axes = out_23_axes_0, epsilon = var_1150_to_fp16, x = inputs_23_cast_fp16)[name = string("out_23_cast_fp16")];
-            tensor<fp16, [1280]> input_35_gamma_0_to_fp16 = const()[name = string("input_35_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184712000)))];
-            tensor<fp16, [1280]> input_35_beta_0_to_fp16 = const()[name = string("input_35_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184714624)))];
-            fp16 input_35_epsilon_0_to_fp16 = const()[name = string("input_35_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_23_cast_fp16)[name = string("input_35_cast_fp16")];
-            string var_1168_pad_type_0 = const()[name = string("op_1168_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_1168_strides_0 = const()[name = string("op_1168_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_1168_pad_0 = const()[name = string("op_1168_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_1168_dilations_0 = const()[name = string("op_1168_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_1168_groups_0 = const()[name = string("op_1168_groups_0"), val = int32(1)];
-            tensor<fp16, [5120, 1280, 1, 1]> layers_3_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184717248))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187994112))))[name = string("layers_3_fc1_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [5120]> layers_3_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_3_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187994240)))];
-            tensor<fp16, [1, 5120, 1, 1]> var_1168_cast_fp16 = conv(bias = layers_3_fc1_inlier_module_bias_to_fp16, dilations = var_1168_dilations_0, groups = var_1168_groups_0, pad = var_1168_pad_0, pad_type = var_1168_pad_type_0, strides = var_1168_strides_0, weight = layers_3_fc1_inlier_module_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = string("op_1168_cast_fp16")];
-            string var_1174_pad_type_0 = const()[name = string("op_1174_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_1174_strides_0 = const()[name = string("op_1174_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_1174_pad_0 = const()[name = string("op_1174_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_1174_dilations_0 = const()[name = string("op_1174_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_1174_groups_0 = const()[name = string("op_1174_groups_0"), val = int32(1)];
-            tensor<fp16, [5120, 1280, 1, 1]> layers_3_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188057280))), nonzero_data = tensor<fp16, [26331]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188004544))))[name = string("layers_3_fc1_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 5120, 1, 1]> var_1174_cast_fp16 = conv(dilations = var_1174_dilations_0, groups = var_1174_groups_0, pad = var_1174_pad_0, pad_type = var_1174_pad_type_0, strides = var_1174_strides_0, weight = layers_3_fc1_outlier_module_weight_to_fp16_sparsified, x = input_35_cast_fp16)[name = string("op_1174_cast_fp16")];
-            tensor<fp16, [1, 5120, 1, 1]> input_37_cast_fp16 = add(x = var_1168_cast_fp16, y = var_1174_cast_fp16)[name = string("input_37_cast_fp16")];
-            string input_mode_0 = const()[name = string("input_mode_0"), val = string("EXACT")];
-            tensor<fp16, [1, 5120, 1, 1]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_37_cast_fp16)[name = string("input_cast_fp16")];
-            string var_1185_pad_type_0 = const()[name = string("op_1185_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_1185_strides_0 = const()[name = string("op_1185_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_1185_pad_0 = const()[name = string("op_1185_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_1185_dilations_0 = const()[name = string("op_1185_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_1185_groups_0 = const()[name = string("op_1185_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 5120, 1, 1]> layers_3_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188876544))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192153408))))[name = string("layers_3_fc2_inlier_module_weight_to_fp16_palettized")];
-            tensor<fp16, [1280]> layers_3_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_3_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192153536)))];
-            tensor<fp16, [1, 1280, 1, 1]> var_1185_cast_fp16 = conv(bias = layers_3_fc2_inlier_module_bias_to_fp16, dilations = var_1185_dilations_0, groups = var_1185_groups_0, pad = var_1185_pad_0, pad_type = var_1185_pad_type_0, strides = var_1185_strides_0, weight = layers_3_fc2_inlier_module_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("op_1185_cast_fp16")];
-            string var_1191_pad_type_0 = const()[name = string("op_1191_pad_type_0"), val = string("valid")];
-            tensor<int32, [2]> var_1191_strides_0 = const()[name = string("op_1191_strides_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [4]> var_1191_pad_0 = const()[name = string("op_1191_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [2]> var_1191_dilations_0 = const()[name = string("op_1191_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            int32 var_1191_groups_0 = const()[name = string("op_1191_groups_0"), val = int32(1)];
-            tensor<fp16, [1280, 5120, 1, 1]> layers_3_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192226688))), nonzero_data = tensor<fp16, [35232]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192156160))))[name = string("layers_3_fc2_outlier_module_weight_to_fp16_sparsified")];
-            tensor<fp16, [1, 1280, 1, 1]> var_1191_cast_fp16 = conv(dilations = var_1191_dilations_0, groups = var_1191_groups_0, pad = var_1191_pad_0, pad_type = var_1191_pad_type_0, strides = var_1191_strides_0, weight = layers_3_fc2_outlier_module_weight_to_fp16_sparsified, x = input_cast_fp16)[name = string("op_1191_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> hidden_states_9_cast_fp16 = add(x = var_1185_cast_fp16, y = var_1191_cast_fp16)[name = string("hidden_states_9_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> inputs_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("inputs_cast_fp16")];
-            tensor<int32, [1]> out_axes_0 = const()[name = string("out_axes_0"), val = tensor<int32, [1]>([1])];
-            fp16 var_1211_to_fp16 = const()[name = string("op_1211_to_fp16"), val = fp16(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> out_cast_fp16 = layer_norm(axes = out_axes_0, epsilon = var_1211_to_fp16, x = inputs_cast_fp16)[name = string("out_cast_fp16")];
-            tensor<fp16, [1280]> hidden_states_gamma_0_to_fp16 = const()[name = string("hidden_states_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193045952)))];
-            tensor<fp16, [1280]> hidden_states_beta_0_to_fp16 = const()[name = string("hidden_states_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193048576)))];
-            fp16 hidden_states_epsilon_0_to_fp16 = const()[name = string("hidden_states_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> hidden_states_cast_fp16 = batch_norm(beta = hidden_states_beta_0_to_fp16, epsilon = hidden_states_epsilon_0_to_fp16, gamma = hidden_states_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_cast_fp16)[name = string("hidden_states_cast_fp16")];
-            tensor<int32, [1]> var_1222_axes_0 = const()[name = string("op_1222_axes_0"), val = tensor<int32, [1]>([2])];
-            tensor<fp16, [1, 1280, 1]> var_1222_cast_fp16 = squeeze(axes = var_1222_axes_0, x = hidden_states_cast_fp16)[name = string("op_1222_cast_fp16")];
-            tensor<int32, [3]> var_1225_perm_0 = const()[name = string("op_1225_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
-            tensor<fp16, [51866]> linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor<fp16, [51866]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193051200)))];
-            tensor<fp16, [1, 1, 1280]> var_1225_cast_fp16 = transpose(perm = var_1225_perm_0, x = var_1222_cast_fp16)[name = string("transpose_0")];
-            tensor<fp16, [1, 1, 51866]> logits = linear(bias = linear_0_bias_0_to_fp16, weight = embed_tokens_weight_to_fp16, x = var_1225_cast_fp16)[name = string("linear_0_cast_fp16")];
-            int32 var_1229 = const()[name = string("op_1229"), val = int32(1)];
-            bool obj_81_interleave_0 = const()[name = string("obj_81_interleave_0"), val = bool(false)];
-            tensor<fp16, [1, 5120, 1, 1]> key_cache_updates = concat(axis = var_1229, interleave = obj_81_interleave_0, values = (current_key_1_cast_fp16, current_key_3_cast_fp16, current_key_5_cast_fp16, current_key_cast_fp16))[name = string("obj_81_cast_fp16")];
-            int32 var_1232 = const()[name = string("op_1232"), val = int32(1)];
-            bool obj_83_interleave_0 = const()[name = string("obj_83_interleave_0"), val = bool(false)];
-            tensor<fp16, [1, 5120, 1, 1]> value_cache_updates = concat(axis = var_1232, interleave = obj_83_interleave_0, values = (current_value_1_cast_fp16, current_value_3_cast_fp16, current_value_5_cast_fp16, current_value_cast_fp16))[name = string("obj_83_cast_fp16")];
-            tensor<int32, [4]> var_1243_begin_0 = const()[name = string("op_1243_begin_0"), val = tensor<int32, [4]>([0, 4, 0, 0])];
-            tensor<int32, [4]> var_1243_end_0 = const()[name = string("op_1243_end_0"), val = tensor<int32, [4]>([1, 5, 1, 1536])];
-            tensor<bool, [4]> var_1243_end_mask_0 = const()[name = string("op_1243_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
-            tensor<fp16, [1, 1, 1, 1536]> var_1243_cast_fp16 = slice_by_index(begin = var_1243_begin_0, end = var_1243_end_0, end_mask = var_1243_end_mask_0, x = obj_59_cast_fp16)[name = string("op_1243_cast_fp16")];
-            tensor<int32, [4]> var_1246_begin_0 = const()[name = string("op_1246_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [4]> var_1246_end_0 = const()[name = string("op_1246_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
-            tensor<bool, [4]> var_1246_end_mask_0 = const()[name = string("op_1246_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
-            tensor<bool, [4]> var_1246_squeeze_mask_0 = const()[name = string("op_1246_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
-            tensor<fp16, [1, 1, 1536]> var_1246_cast_fp16 = slice_by_index(begin = var_1246_begin_0, end = var_1246_end_0, end_mask = var_1246_end_mask_0, squeeze_mask = var_1246_squeeze_mask_0, x = var_1243_cast_fp16)[name = string("op_1246_cast_fp16")];
-            tensor<int32, [4]> var_1261_begin_0 = const()[name = string("op_1261_begin_0"), val = tensor<int32, [4]>([0, 11, 0, 0])];
-            tensor<int32, [4]> var_1261_end_0 = const()[name = string("op_1261_end_0"), val = tensor<int32, [4]>([1, 12, 1, 1536])];
-            tensor<bool, [4]> var_1261_end_mask_0 = const()[name = string("op_1261_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
-            tensor<fp16, [1, 1, 1, 1536]> var_1261_cast_fp16 = slice_by_index(begin = var_1261_begin_0, end = var_1261_end_0, end_mask = var_1261_end_mask_0, x = obj_59_cast_fp16)[name = string("op_1261_cast_fp16")];
-            tensor<int32, [4]> var_1264_begin_0 = const()[name = string("op_1264_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [4]> var_1264_end_0 = const()[name = string("op_1264_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
-            tensor<bool, [4]> var_1264_end_mask_0 = const()[name = string("op_1264_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
-            tensor<bool, [4]> var_1264_squeeze_mask_0 = const()[name = string("op_1264_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
-            tensor<fp16, [1, 1, 1536]> var_1264_cast_fp16 = slice_by_index(begin = var_1264_begin_0, end = var_1264_end_0, end_mask = var_1264_end_mask_0, squeeze_mask = var_1264_squeeze_mask_0, x = var_1261_cast_fp16)[name = string("op_1264_cast_fp16")];
-            tensor<int32, [4]> var_1279_begin_0 = const()[name = string("op_1279_begin_0"), val = tensor<int32, [4]>([0, 3, 0, 0])];
-            tensor<int32, [4]> var_1279_end_0 = const()[name = string("op_1279_end_0"), val = tensor<int32, [4]>([1, 4, 1, 1536])];
-            tensor<bool, [4]> var_1279_end_mask_0 = const()[name = string("op_1279_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
-            tensor<fp16, [1, 1, 1, 1536]> var_1279_cast_fp16 = slice_by_index(begin = var_1279_begin_0, end = var_1279_end_0, end_mask = var_1279_end_mask_0, x = obj_77_cast_fp16)[name = string("op_1279_cast_fp16")];
-            tensor<int32, [4]> var_1282_begin_0 = const()[name = string("op_1282_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [4]> var_1282_end_0 = const()[name = string("op_1282_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
-            tensor<bool, [4]> var_1282_end_mask_0 = const()[name = string("op_1282_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
-            tensor<bool, [4]> var_1282_squeeze_mask_0 = const()[name = string("op_1282_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
-            tensor<fp16, [1, 1, 1536]> var_1282_cast_fp16 = slice_by_index(begin = var_1282_begin_0, end = var_1282_end_0, end_mask = var_1282_end_mask_0, squeeze_mask = var_1282_squeeze_mask_0, x = var_1279_cast_fp16)[name = string("op_1282_cast_fp16")];
-            tensor<int32, [4]> var_1297_begin_0 = const()[name = string("op_1297_begin_0"), val = tensor<int32, [4]>([0, 6, 0, 0])];
-            tensor<int32, [4]> var_1297_end_0 = const()[name = string("op_1297_end_0"), val = tensor<int32, [4]>([1, 7, 1, 1536])];
-            tensor<bool, [4]> var_1297_end_mask_0 = const()[name = string("op_1297_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
-            tensor<fp16, [1, 1, 1, 1536]> var_1297_cast_fp16 = slice_by_index(begin = var_1297_begin_0, end = var_1297_end_0, end_mask = var_1297_end_mask_0, x = obj_77_cast_fp16)[name = string("op_1297_cast_fp16")];
-            tensor<int32, [4]> var_1300_begin_0 = const()[name = string("op_1300_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [4]> var_1300_end_0 = const()[name = string("op_1300_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
-            tensor<bool, [4]> var_1300_end_mask_0 = const()[name = string("op_1300_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
-            tensor<bool, [4]> var_1300_squeeze_mask_0 = const()[name = string("op_1300_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
-            tensor<fp16, [1, 1, 1536]> var_1300_cast_fp16 = slice_by_index(begin = var_1300_begin_0, end = var_1300_end_0, end_mask = var_1300_end_mask_0, squeeze_mask = var_1300_squeeze_mask_0, x = var_1297_cast_fp16)[name = string("op_1300_cast_fp16")];
-            tensor<int32, [4]> var_1315_begin_0 = const()[name = string("op_1315_begin_0"), val = tensor<int32, [4]>([0, 11, 0, 0])];
-            tensor<int32, [4]> var_1315_end_0 = const()[name = string("op_1315_end_0"), val = tensor<int32, [4]>([1, 12, 1, 1536])];
-            tensor<bool, [4]> var_1315_end_mask_0 = const()[name = string("op_1315_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
-            tensor<fp16, [1, 1, 1, 1536]> var_1315_cast_fp16 = slice_by_index(begin = var_1315_begin_0, end = var_1315_end_0, end_mask = var_1315_end_mask_0, x = obj_77_cast_fp16)[name = string("op_1315_cast_fp16")];
-            tensor<int32, [4]> var_1318_begin_0 = const()[name = string("op_1318_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [4]> var_1318_end_0 = const()[name = string("op_1318_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
-            tensor<bool, [4]> var_1318_end_mask_0 = const()[name = string("op_1318_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
-            tensor<bool, [4]> var_1318_squeeze_mask_0 = const()[name = string("op_1318_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
-            tensor<fp16, [1, 1, 1536]> var_1318_cast_fp16 = slice_by_index(begin = var_1318_begin_0, end = var_1318_end_0, end_mask = var_1318_end_mask_0, squeeze_mask = var_1318_squeeze_mask_0, x = var_1315_cast_fp16)[name = string("op_1318_cast_fp16")];
-            tensor<int32, [4]> var_1333_begin_0 = const()[name = string("op_1333_begin_0"), val = tensor<int32, [4]>([0, 14, 0, 0])];
-            tensor<int32, [4]> var_1333_end_0 = const()[name = string("op_1333_end_0"), val = tensor<int32, [4]>([1, 15, 1, 1536])];
-            tensor<bool, [4]> var_1333_end_mask_0 = const()[name = string("op_1333_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
-            tensor<fp16, [1, 1, 1, 1536]> var_1333_cast_fp16 = slice_by_index(begin = var_1333_begin_0, end = var_1333_end_0, end_mask = var_1333_end_mask_0, x = obj_77_cast_fp16)[name = string("op_1333_cast_fp16")];
-            tensor<int32, [4]> var_1336_begin_0 = const()[name = string("op_1336_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [4]> var_1336_end_0 = const()[name = string("op_1336_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
-            tensor<bool, [4]> var_1336_end_mask_0 = const()[name = string("op_1336_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
-            tensor<bool, [4]> var_1336_squeeze_mask_0 = const()[name = string("op_1336_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
-            tensor<fp16, [1, 1, 1536]> var_1336_cast_fp16 = slice_by_index(begin = var_1336_begin_0, end = var_1336_end_0, end_mask = var_1336_end_mask_0, squeeze_mask = var_1336_squeeze_mask_0, x = var_1333_cast_fp16)[name = string("op_1336_cast_fp16")];
-            int32 var_1343 = const()[name = string("op_1343"), val = int32(1)];
-            bool var_1344_interleave_0 = const()[name = string("op_1344_interleave_0"), val = bool(false)];
-            tensor<fp16, [1, 6, 1536]> var_1344_cast_fp16 = concat(axis = var_1343, interleave = var_1344_interleave_0, values = (var_1246_cast_fp16, var_1264_cast_fp16, var_1282_cast_fp16, var_1300_cast_fp16, var_1318_cast_fp16, var_1336_cast_fp16))[name = string("op_1344_cast_fp16")];
-            bool var_1347 = const()[name = string("op_1347"), val = bool(false)];
-            tensor<int32, [1]> obj_axes_0 = const()[name = string("obj_axes_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1536]> alignment_heads_weights = reduce_mean(axes = obj_axes_0, keep_dims = var_1347, x = var_1344_cast_fp16)[name = string("obj_cast_fp16")];
-        } -> (logits, key_cache_updates, value_cache_updates, alignment_heads_weights);
-}
\ No newline at end of file